Merge branch 'main' into worktree-investigate-468

Resolve conflicts: auto.ts (keep debug-logger imports alongside extracted auto-recovery/dashboard/dispatch/supervisor modules), files.ts (merge native parser functions with debug timing), index.ts (keep debugLog import, add loadToolApiKeys, include knowledgeBlock in fullSystem), preferences.ts (keep #468 indexOf fix with export and \r\n support).
2026-03-16 07:33:02 -05:00 · 2026-03-16 07:33:02 -05:00 · eb196772d1
commit eb196772d1
parent 9f83ab91dd 7e0cdec672
195 changed files with 21446 additions and 4374 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -0,0 +1,107 @@
+name: Bug Report
+description: Report a bug in GSD
+labels: ["bug"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for reporting a bug! Please fill out the sections below so we can reproduce and fix it.
+
+  - type: input
+    id: version
+    attributes:
+      label: GSD version
+      description: Run `gsd --version` or check `package.json`
+      placeholder: "e.g., 2.15.0"
+    validations:
+      required: true
+
+  - type: dropdown
+    id: area
+    attributes:
+      label: Affected area
+      options:
+        - Auto-mode / dispatch loop
+        - TUI / terminal display
+        - Planning / roadmap
+        - Phase execution
+        - Git / worktree isolation
+        - Hook orchestration
+        - State management
+        - AI provider integration
+        - CLI / commands
+        - Other
+    validations:
+      required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: What happened?
+      description: A clear description of the bug.
+      placeholder: "When I run X, Y happens instead of Z."
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected behavior
+      description: What should have happened instead?
+    validations:
+      required: true
+
+  - type: textarea
+    id: reproduce
+    attributes:
+      label: Steps to reproduce
+      description: Minimal steps to trigger the bug.
+      placeholder: |
+        1. Run `gsd ...`
+        2. Select option ...
+        3. See error
+    validations:
+      required: true
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Error output / logs
+      description: Paste any error messages or relevant log output.
+      render: shell
+
+  - type: dropdown
+    id: os
+    attributes:
+      label: Operating system
+      options:
+        - macOS
+        - Linux
+        - Windows
+        - Other
+    validations:
+      required: true
+
+  - type: input
+    id: node-version
+    attributes:
+      label: Node.js version
+      description: Run `node --version`
+      placeholder: "e.g., v22.4.0"
+
+  - type: dropdown
+    id: ai-provider
+    attributes:
+      label: AI provider (if relevant)
+      options:
+        - Anthropic (Claude)
+        - OpenRouter
+        - OpenAI-compatible
+        - Other
+        - N/A
+
+  - type: textarea
+    id: context
+    attributes:
+      label: Additional context
+      description: Anything else — screenshots, config snippets, related issues.
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -0,0 +1,49 @@
+## Summary
+<!-- What does this PR do? 1-3 bullet points. -->
+
+-
+
+## Motivation
+<!-- Why is this change needed? Link issues if applicable. -->
+
+Closes #
+
+## Change type
+<!-- Check one: -->
+- [ ] `feat` — New feature or capability
+- [ ] `fix` — Bug fix
+- [ ] `refactor` — Code restructuring (no behavior change)
+- [ ] `test` — Adding or updating tests
+- [ ] `docs` — Documentation only
+- [ ] `chore` — Build, CI, or tooling changes
+
+## Scope
+<!-- Which packages/areas does this touch? Check all that apply. -->
+- [ ] `pi-tui` — Terminal UI
+- [ ] `pi-ai` — AI/LLM layer
+- [ ] `pi-agent-core` — Agent orchestration
+- [ ] `pi-coding-agent` — Coding agent
+- [ ] `gsd extension` — GSD workflow (`src/resources/extensions/gsd/`)
+- [ ] `native` — Native bindings
+- [ ] `ci/build` — Workflows, scripts, config
+
+## Breaking changes
+<!-- Does this change any public API, CLI behavior, config format, or file structure? -->
+- [ ] No breaking changes
+- [ ] Yes — describe below:
+
+## Test plan
+<!-- How was this tested? Check all that apply. -->
+- [ ] Unit tests added/updated (`npm run test:unit`)
+- [ ] Integration tests added/updated (`npm run test:integration`)
+- [ ] Manual testing — describe steps:
+- [ ] No tests needed — explain why:
+
+## Rollback plan
+<!-- If this causes issues in production, how do we revert safely? -->
+- [ ] Safe to revert (no migrations, no state changes)
+- [ ] Requires steps — describe:
+
+## Release context
+<!-- What branch does this PR target? -->
+- **Target**: <!-- e.g., milestone/2.15.x or main -->
--- a/.github/workflows/build-native.yml
+++ b/.github/workflows/build-native.yml
@ -124,6 +124,20 @@ jobs:
      - name: Sync platform package versions
        run: node native/scripts/sync-platform-versions.cjs

+      - name: Detect prerelease version
+        id: version-check
+        run: |
+          VERSION=$(node -p "require('./package.json').version")
+          if echo "$VERSION" | grep -q '-next\.'; then
+            echo "is_prerelease=true" >> "$GITHUB_OUTPUT"
+            echo "tag_flag=--tag next" >> "$GITHUB_OUTPUT"
+            echo "Prerelease detected: ${VERSION} → publishing with --tag next"
+          else
+            echo "is_prerelease=false" >> "$GITHUB_OUTPUT"
+            echo "tag_flag=" >> "$GITHUB_OUTPUT"
+            echo "Stable release: ${VERSION} → publishing with --tag latest (default)"
+          fi
+
      - name: Publish platform packages
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@ -131,7 +145,7 @@ jobs:
          for platform in darwin-arm64 darwin-x64 linux-x64-gnu linux-arm64-gnu win32-x64-msvc; do
            echo "Publishing @gsd-build/engine-${platform}..."
            cd "native/npm/${platform}"
-            OUTPUT=$(npm publish --access public 2>&1) && echo "$OUTPUT" || {
+            OUTPUT=$(npm publish --access public ${{ steps.version-check.outputs.tag_flag }} 2>&1) && echo "$OUTPUT" || {
              if echo "$OUTPUT" | grep -q "cannot publish over the previously published"; then
                echo "Already published, skipping"
              else
@ -183,7 +197,7 @@ jobs:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
        run: |
          # --ignore-scripts: skip prepublishOnly since we built explicitly above
-          OUTPUT=$(npm publish --ignore-scripts 2>&1) && echo "$OUTPUT" || {
+          OUTPUT=$(npm publish --ignore-scripts ${{ steps.version-check.outputs.tag_flag }} 2>&1) && echo "$OUTPUT" || {
            if echo "$OUTPUT" | grep -q "cannot publish over the previously published\|You cannot publish over"; then
              echo "Already published, skipping"
            else
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -28,6 +28,9 @@ jobs:
      - name: Build
        run: npm run build

+      - name: Typecheck extensions
+        run: npm run typecheck:extensions
+
      - name: Validate package is installable
        run: npm run validate-pack

@ -58,5 +61,8 @@ jobs:
      - name: Build
        run: npm run build

+      - name: Typecheck extensions
+        run: npm run typecheck:extensions
+
      - name: Run unit tests
        run: npm run test:unit
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,7 @@
 .gsd/milestones/**/continue.md

 .claude/
+RELEASE-GUIDE.md
 *.tgz
 .DS_Store
 Thumbs.db
--- a/.gsd/DECISIONS.md
+++ b/.gsd/DECISIONS.md
@ -50,3 +50,6 @@
 | D042 | M003/S04 | pattern | shouldUseWorktreeIsolation override parameter | Accept optional overridePrefs for testability | loadEffectiveGSDPreferences computes PROJECT_PREFERENCES_PATH at module load time from process.cwd(). chdir-based test fixtures cannot influence it. Override parameter enables reliable testing. | Yes — if preference loading becomes dynamic |
 | D043 | M003/S04 | pattern | validatePreferences exported | Export from preferences.ts for direct test access | Was module-private. Tests need to call it directly without full file-loading pipeline. No downstream consumers affected. | No |
 | D044 | M003/S05 | pattern | Self-heal strategy for merge failures | Detect real conflicts immediately (skip retry), retry only transient failures once | Real conflicts will fail identically on retry — wasting time. Transient failures (stale index, leftover merge state) recover after abort+reset. Fast escalation for conflicts, automatic recovery for everything else. | Yes — if retry proves useful for some conflict types |
+| D045 | M004 | arch | SQLite provider strategy | Tiered chain: node:sqlite → better-sqlite3 → null | node:sqlite available on Node 22.5+ (our target), better-sqlite3 as fallback for older Node, null for graceful degradation. DbAdapter normalizes API differences. | Yes — if node:sqlite stabilizes and better-sqlite3 path can be dropped |
+| D046 | M004 | arch | createWorktree sync/async for DB copy | Keep synchronous, use copyFileSync | Memory-db made createWorktree async for dynamic imports, but copyWorktreeDb is purely sync (copyFileSync). Static import + isDbAvailable() guard avoids async cascade through createAutoWorktree and auto.ts call sites. | No |
+| D047 | M004 | arch | Port strategy | Adapt to current architecture, not blind merge | 145 commits divergence, auto.ts decomposed into 6 modules. Memory-db code is reference — capabilities ported into current file structure (auto-prompts.ts, auto-dispatch.ts, etc.), not cherry-picked. | No |
--- a/.gsd/PROJECT.md
+++ b/.gsd/PROJECT.md
@ -2,7 +2,7 @@

 ## What This Is

-A pi coding agent extension (GSD — "Get Stuff Done") that provides structured planning, auto-mode execution, and project management for autonomous coding sessions. Includes proactive secret management, browser automation tools for UI verification, and worktree-isolated git architecture for zero-friction autonomous execution.
+A pi coding agent extension (GSD — "Get Stuff Done") that provides structured planning, auto-mode execution, and project management for autonomous coding sessions. Includes proactive secret management, browser automation tools for UI verification, worktree-isolated git architecture for zero-friction autonomous execution, and SQLite-backed surgical context injection for token-efficient prompt assembly.

 ## Core Value

@ -21,11 +21,13 @@ The GSD extension is fully functional with:
 - Worktree-isolated git architecture: auto-worktree per milestone, --no-ff slice merges, milestone squash to main, preference-gated isolation modes, self-healing git repair, doctor git health checks, full e2e test coverage
 - Auto-worktree lifecycle: `auto-worktree.ts` module creates isolated worktrees per milestone (`milestone/<MID>` branches), wired into auto.ts startAuto/resume/stop with split-brain prevention
 - Branch-per-slice git model with squash merge to main (legacy mode, supported via `git.isolation: "branch"` preference)
+- Decomposed auto-mode: `auto-prompts.ts` (prompt builders), `auto-dispatch.ts` (unit→prompt routing), `auto-recovery.ts` (timeout/crash recovery), `auto-worktree.ts` (worktree lifecycle)

 ## Architecture / Key Patterns

 - **Extension model**: pi extensions register tools, commands, hooks via `ExtensionAPI`
 - **State machine**: `auto.ts` drives `dispatchNextUnit()` which reads disk state and dispatches fresh sessions
+- **Dispatch pipeline**: `auto-dispatch.ts` resolves phase → unit type + prompt via `resolveDispatch()`. Prompt builders live in `auto-prompts.ts`.
 - **Secrets gate**: `startAuto()` checks `getManifestStatus()` before first dispatch
 - **Disk-driven state**: `.gsd/` files are the source of truth, `STATE.md` is derived cache
 - **File parsing**: `files.ts` has markdown parsers for all GSD file types
@ -43,3 +45,4 @@ See `.gsd/REQUIREMENTS.md` for the explicit capability contract, requirement sta
 - [x] M001: Proactive Secret Management — Front-loaded API key collection into planning so auto-mode runs uninterrupted (10 requirements validated)
 - [x] M002: Browser Tools Performance & Intelligence — Module decomposition, action pipeline optimization, sharp-based screenshots, form intelligence, intent-ranked retrieval, semantic actions, 108-test suite (12 requirements validated)
 - [x] M003: Worktree-Isolated Git Architecture — Auto-worktree per milestone, --no-ff slice merges, milestone squash to main, preferences + backwards compat, self-healing git repair, doctor health checks, full e2e test suite (13 requirements validated)
+- [ ] M004: SQLite Context Store — Surgical context injection via SQLite-backed query layer, replacing whole-file prompt dumps with scoped DB queries for ≥30% token savings
--- a/.gsd/REQUIREMENTS.md
+++ b/.gsd/REQUIREMENTS.md
@ -4,7 +4,148 @@ This file is the explicit capability and coverage contract for the project.

 ## Active

-(No active requirements — all M003 requirements validated.)
+### R045 — SQLite DB layer with tiered provider chain
+- Class: core-capability
+- Status: active
+- Description: A SQLite abstraction layer that tries `node:sqlite` (Node 22.5+), falls back to `better-sqlite3`, then to null. A thin `DbAdapter` interface normalizes API differences. Schema init creates decisions, requirements, artifacts tables plus filtered views. WAL mode on file-backed databases.
+- Why it matters: The foundation for surgical context injection. Without a queryable store, prompts must dump entire files.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S01
+- Supporting slices: none
+- Validation: unmapped
+- Notes: Port from memory-db worktree `gsd-db.ts`. Tiered provider chain proven on Node 22.20.0. `node:sqlite` returns null-prototype rows — DbAdapter normalizes via spread.
+
+### R046 — Graceful degradation when SQLite unavailable
+- Class: continuity
+- Status: active
+- Description: When no SQLite provider loads, all query functions return empty results and all prompt builders fall back to `inlineGsdRootFile` filesystem loading. No crash, no visible error.
+- Why it matters: SQLite must be optional. Users on exotic platforms or old Node versions must not be blocked.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S01
+- Supporting slices: M004/S03
+- Validation: unmapped
+- Notes: Every query function guards with `isDbAvailable()` + try/catch. Every prompt builder falls back to existing `inlineGsdRootFile`.
+
+### R047 — Auto-migration from markdown to DB on first run
+- Class: core-capability
+- Status: active
+- Description: When auto-mode starts on a project with `.gsd/` markdown files but no `gsd.db`, silently import all artifact types into a fresh DB. Idempotent — safe to re-run.
+- Why it matters: Existing projects must transparently gain DB benefits without manual migration.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S02
+- Supporting slices: M004/S01
+- Validation: unmapped
+- Notes: Port from memory-db `md-importer.ts`. Custom parsers for DECISIONS.md pipe-table format and REQUIREMENTS.md section/bullet format. Hierarchy walker for milestones → slices → tasks.
+
+### R048 — Round-trip fidelity for all artifact types
+- Class: quality-attribute
+- Status: active
+- Description: Importing markdown into DB and regenerating markdown produces field-identical output. No data loss, no format drift.
+- Why it matters: Dual-write means DB→markdown generation must be faithful. Format drift corrupts the human-readable artifacts.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S02
+- Supporting slices: M004/S06
+- Validation: unmapped
+- Notes: Port from memory-db. Custom parsers and generators must produce/consume identical formats.
+
+### R049 — Surgical prompt injection via DB queries
+- Class: core-capability
+- Status: active
+- Description: All prompt builders in `auto-prompts.ts` use scoped DB queries instead of whole-file `inlineGsdRootFile` for decisions, requirements, and project context. Decisions filtered by milestone, requirements filtered by slice ownership.
+- Why it matters: This is the core value — smaller, more relevant prompts mean better agent reasoning and fewer wasted tokens.
+- Source: user
+- Primary owning slice: M004/S03
+- Supporting slices: M004/S01, M004/S02
+- Validation: unmapped
+- Notes: Port from memory-db DB-aware helpers. Must be rewired into current `auto-prompts.ts` (not the old monolithic auto.ts). 19 `inlineGsdRootFile` calls to replace across 11 prompt builders.
+
+### R050 — Dual-write keeping markdown and DB in sync
+- Class: continuity
+- Status: active
+- Description: After each dispatch unit completes and auto-commits, re-import modified markdown files into the DB. Structured LLM tools write to DB first, then regenerate markdown. Both directions stay synchronized.
+- Why it matters: Markdown files are the human-readable source of truth. The DB is the query index. They must agree.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S03
+- Supporting slices: M004/S06
+- Validation: unmapped
+- Notes: Re-import in `handleAgentEnd` after auto-commit. DB-first write in structured tools triggers markdown generation.
+
+### R051 — Token measurement with before/after comparison
+- Class: operability
+- Status: active
+- Description: `promptCharCount` and `baselineCharCount` fields added to `UnitMetrics`. Measurement wired into all `snapshotUnitMetrics` call sites. Baseline = full markdown content. Prompt = DB-scoped content. Difference = token savings.
+- Why it matters: Proves the ≥30% savings claim with real data. Enables ongoing monitoring of prompt efficiency.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S04
+- Supporting slices: M004/S03
+- Validation: unmapped
+- Notes: Port from memory-db. Module-scoped measurement vars reset at top of `dispatchNextUnit`.
+
+### R052 — DB-first state derivation with filesystem fallback
+- Class: core-capability
+- Status: active
+- Description: `deriveState()` queries the artifacts table for file content when DB is available, replacing the batch file-parse step. File discovery still uses disk. Falls back to filesystem when DB unavailable.
+- Why it matters: Faster state derivation on large projects. Consistent with DB-first architecture.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S04
+- Supporting slices: M004/S01, M004/S02
+- Validation: unmapped
+- Notes: Port from memory-db. File discovery (which milestones/slices/tasks exist) stays on disk. Only content loading switches to DB.
+
+### R053 — Worktree DB copy on creation
+- Class: integration
+- Status: active
+- Description: When a worktree is created, copy `gsd.db` from the source project into the worktree's `.gsd/` directory. Skip WAL/SHM files. Non-fatal on failure.
+- Why it matters: Worktrees need their own DB with the project's current state. Without a copy, the worktree starts with no DB context.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S05
+- Supporting slices: M004/S01
+- Validation: unmapped
+- Notes: Port from memory-db `copyWorktreeDb`. Keep `createWorktree` synchronous — `copyFileSync` is sufficient. Guard with `isDbAvailable()`.
+
+### R054 — Worktree DB merge reconciliation
+- Class: integration
+- Status: active
+- Description: When a worktree merges back (slice or milestone), ATTACH the worktree's DB and reconcile rows: INSERT OR REPLACE in a transaction with conflict detection by content column comparison.
+- Why it matters: The worktree may have added decisions, requirements, or artifacts that the main DB doesn't have.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S05
+- Supporting slices: M004/S01
+- Validation: unmapped
+- Notes: Port from memory-db `reconcileWorktreeDb`. ATTACH/DETACH pattern with try/finally for cleanup.
+
+### R055 — Structured LLM tools for decisions/requirements/summaries
+- Class: core-capability
+- Status: active
+- Description: Three tools registered: `gsd_save_decision` (auto-assigns D-numbers, writes to DB + regenerates DECISIONS.md), `gsd_update_requirement` (verifies existence, updates DB + regenerates REQUIREMENTS.md), `gsd_save_summary` (writes artifact to DB + disk).
+- Why it matters: Eliminates the markdown-then-parse roundtrip. LLM writes structured data directly, guaranteeing parseable output.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S06
+- Supporting slices: M004/S03
+- Validation: unmapped
+- Notes: Port from memory-db. DB-first write pattern: upsert → fetch all → generate markdown → write file.
+
+### R056 — /gsd inspect command for DB diagnostics
+- Class: operability
+- Status: active
+- Description: A `/gsd inspect` slash command that dumps schema version, table row counts, and recent entries from each table.
+- Why it matters: When things go wrong, the user needs visibility into DB state without running raw SQL.
+- Source: execution (memory-db port)
+- Primary owning slice: M004/S06
+- Supporting slices: M004/S01
+- Validation: unmapped
+- Notes: Port from memory-db. Autocomplete for subcommands (decisions, requirements, artifacts, all).
+
+### R057 — ≥30% token savings on planning/research dispatches
+- Class: quality-attribute
+- Status: active
+- Description: Surgical prompt injection delivers ≥30% fewer prompt characters compared to whole-file loading, measured on mature projects with multiple milestones, decisions, and requirements.
+- Why it matters: The primary user-visible value of the entire DB architecture. If savings aren't real, the complexity isn't justified.
+- Source: user
+- Primary owning slice: M004/S07
+- Supporting slices: M004/S03, M004/S04
+- Validation: unmapped
+- Notes: Memory-db proved: 52.2% plan-slice, 66.3% decisions-only, 32.2% research composite, 42.4% lifecycle. Must re-prove against current codebase.

 ## Validated

@ -516,11 +657,24 @@ This file is the explicit capability and coverage contract for the project.
 | R042 | core-capability | deferred | none | none | unmapped |
 | R043 | quality-attribute | deferred | none | none | unmapped |
 | R044 | anti-feature | out-of-scope | none | none | n/a |
+| R045 | core-capability | active | M004/S01 | none | unmapped |
+| R046 | continuity | active | M004/S01 | M004/S03 | unmapped |
+| R047 | core-capability | active | M004/S02 | M004/S01 | unmapped |
+| R048 | quality-attribute | active | M004/S02 | M004/S06 | unmapped |
+| R049 | core-capability | active | M004/S03 | M004/S01, M004/S02 | unmapped |
+| R050 | continuity | active | M004/S03 | M004/S06 | unmapped |
+| R051 | operability | active | M004/S04 | M004/S03 | unmapped |
+| R052 | core-capability | active | M004/S04 | M004/S01, M004/S02 | unmapped |
+| R053 | integration | active | M004/S05 | M004/S01 | unmapped |
+| R054 | integration | active | M004/S05 | M004/S01 | unmapped |
+| R055 | core-capability | active | M004/S06 | M004/S03 | unmapped |
+| R056 | operability | active | M004/S06 | M004/S01 | unmapped |
+| R057 | quality-attribute | active | M004/S07 | M004/S03, M004/S04 | unmapped |

 ## Coverage Summary

- Active requirements: 0
- Mapped to slices: 0
+- Active requirements: 13
+- Mapped to slices: 13
 - Validated: 35
 - Deferred: 5
 - Out of scope: 4
--- a/.gsd/milestones/M004/M004-CONTEXT.md
+++ b/.gsd/milestones/M004/M004-CONTEXT.md
@ -0,0 +1,126 @@
+# M004: SQLite Context Store — Surgical Prompt Injection
+
+**Gathered:** 2026-03-15
+**Status:** Ready for planning
+
+## Project Description
+
+Port the completed memory-db worktree's SQLite-backed context store into the current GSD codebase. The memory-db work (7 slices, 21 requirements validated, 293 tests) was built against a pre-v2.12.0 codebase that has since diverged significantly — 145 commits on main including auto.ts decomposition, worktree architecture overhaul, and extensive refactoring. This is a port, not a merge.
+
+## Why This Milestone
+
+The current prompt assembly dumps entire files (DECISIONS.md, REQUIREMENTS.md, PROJECT.md) into every dispatch prompt regardless of relevance. On a mature project with 40+ decisions and 30+ requirements, most of that context is irrelevant to the active slice. A SQLite query layer enables surgical injection — only the decisions scoped to this milestone, only the requirements owned by this slice. The user's emphasis: "super fast context ingestion" — the DB is the mechanism for being "very, very surgically" selective about what context each task sees.
+
+## User-Visible Outcome
+
+### When this milestone is complete, the user can:
+
+- Run auto-mode and see ≥30% smaller prompts with only relevant context injected
+- Use `gsd_save_decision`, `gsd_update_requirement`, `gsd_save_summary` tool calls that bypass markdown parsing
+- Run `/gsd inspect` to see DB state for diagnostics
+- Start auto-mode on an existing project and have gsd.db appear silently with all artifacts imported
+
+### Entry point / environment
+
+- Entry point: `/gsd auto` CLI command, structured LLM tools during dispatch, `/gsd inspect` slash command
+- Environment: local dev (Node 22.5+, runs in pi agent process)
+- Live dependencies involved: none (SQLite is embedded, no external services)
+
+## Completion Class
+
+- Contract complete means: DB opens, queries return scoped data, prompt builders use DB queries, tests pass
+- Integration complete means: full auto-mode cycle runs with DB-backed context injection, dual-write keeps markdown in sync, worktree lifecycle copies/reconciles DB
+- Operational complete means: existing projects migrate transparently, graceful fallback when SQLite unavailable, token savings measured and ≥30%
+
+## Final Integrated Acceptance
+
+To call this milestone complete, we must prove:
+
+- A full auto-mode dispatch cycle (research → plan → execute → complete) produces correct prompts with scoped context from the DB
+- An existing project with markdown artifacts silently migrates to DB on first run with zero data loss
+- Token measurement shows ≥30% savings on planning/research units
+- The system works identically (via fallback) when SQLite is unavailable
+- TypeScript compiles clean, all existing tests pass, new DB test suite passes
+
+## Risks and Unknowns
+
+- `auto-prompts.ts` has 11 prompt builders with 19 `inlineGsdRootFile` calls — rewiring must preserve existing prompt structure and fallback behavior
+- `handleAgentEnd` in `auto.ts` has new post-unit-hook machinery since memory-db was built — dual-write re-import must integrate without disrupting hooks/doctor/rebuildState sequence
+- `worktree-manager.ts` `createWorktree` is sync on main — DB copy must work synchronously (decision: use `copyFileSync`, keep sync)
+- `node:sqlite` is experimental in Node 22 — API could change, but the DbAdapter abstraction insulates against this
+- Memory-db's markdown parsers for DECISIONS.md and REQUIREMENTS.md are custom (not using `files.ts`) — must verify they handle current file formats
+
+## Existing Codebase / Prior Art
+
+- `src/resources/extensions/gsd/auto-prompts.ts` — 880 lines, 11 `build*Prompt()` functions, 19 `inlineGsdRootFile` calls. This is where surgical injection happens.
+- `src/resources/extensions/gsd/auto-dispatch.ts` — `resolveDispatch()` maps units to prompt builders. Imports from `auto-prompts.ts`.
+- `src/resources/extensions/gsd/auto.ts` — `startAuto()`, `handleAgentEnd()`, `dispatchNextUnit()`. DB init/migration goes in startup, re-import in handleAgentEnd.
+- `src/resources/extensions/gsd/state.ts` — `deriveState()` — 587 lines. DB-first content loading replaces batch file parse.
+- `src/resources/extensions/gsd/metrics.ts` — `UnitMetrics` interface, `snapshotUnitMetrics()`. Add `promptCharCount`/`baselineCharCount`.
+- `src/resources/extensions/gsd/worktree-manager.ts` — `createWorktree()` (sync), `mergeWorktreeToMain()`. DB copy/reconcile hooks here.
+- `src/resources/extensions/gsd/index.ts` — tool registrations. 3 new structured tools.
+- `src/resources/extensions/gsd/commands.ts` — slash command registration. `/gsd inspect`.
+- `src/resources/extensions/gsd/types.ts` — needs Decision/Requirement interfaces.
+- `.gsd/worktrees/memory-db/` — the source worktree with all memory-db implementation. Reference code lives here.
+
+### Memory-db source modules to port:
+- `.gsd/worktrees/memory-db/src/resources/extensions/gsd/gsd-db.ts` — 750 lines, SQLite abstraction layer
+- `.gsd/worktrees/memory-db/src/resources/extensions/gsd/context-store.ts` — 195 lines, query layer + formatters
+- `.gsd/worktrees/memory-db/src/resources/extensions/gsd/md-importer.ts` — 526 lines, markdown parsers + migration orchestrator
+- `.gsd/worktrees/memory-db/src/resources/extensions/gsd/db-writer.ts` — 337 lines, DB→markdown generators + DB-first write helpers
+- `.gsd/worktrees/memory-db/src/resources/extensions/gsd/tests/` — 13 test files covering all DB capabilities
+
+> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution.
+
+## Relevant Requirements
+
+- R045–R057 — all 13 active requirements map to this milestone's 7 slices
+
+## Scope
+
+### In Scope
+
+- SQLite DB layer with tiered provider chain (node:sqlite → better-sqlite3 → null)
+- Auto-migration from markdown files to DB
+- Surgical prompt injection via DB queries in all prompt builders
+- Dual-write keeping markdown and DB in sync (both directions)
+- Token measurement with before/after comparison in UnitMetrics
+- DB-first state derivation in deriveState()
+- Worktree DB copy on creation and merge reconciliation
+- 3 structured LLM tools (gsd_save_decision, gsd_update_requirement, gsd_save_summary)
+- /gsd inspect slash command
+- Full test suite for all DB capabilities
+
+### Out of Scope / Non-Goals
+
+- Vector/embedding search on artifacts (deferred — schema supports future extension)
+- DB export/dump command
+- Changing file discovery in deriveState (stays on disk)
+- Making createWorktree async (keep sync, use copyFileSync for DB copy)
+
+## Technical Constraints
+
+- `node:sqlite` is experimental — use DbAdapter abstraction to insulate
+- `node:sqlite` returns null-prototype rows — normalize via spread in DbAdapter
+- Named SQL parameters must use colon-prefix (`:id`, `:scope`) for `node:sqlite` compatibility
+- `createWorktree` must remain synchronous — no async cascade
+- All DB operations must be wrapped in try/catch with fallback to existing behavior
+- Memory-db source code is reference — adapt to current architecture, don't copy blindly
+
+## Integration Points
+
+- `auto-prompts.ts` — replace `inlineGsdRootFile` with DB-aware helpers (scoped queries with filesystem fallback)
+- `auto.ts` `startAuto()` — DB open + auto-migration before first dispatch
+- `auto.ts` `handleAgentEnd()` — re-import markdown after auto-commit (after doctor + rebuildState, before dispatch)
+- `metrics.ts` — extend `UnitMetrics` with measurement fields, extend `snapshotUnitMetrics` signature
+- `state.ts` `deriveState()` — DB-first content loading with filesystem fallback
+- `worktree-manager.ts` `createWorktree()` — sync DB copy after worktree creation
+- `worktree-command.ts` / merge paths — DB reconciliation after merge
+- `index.ts` — 3 new tool registrations
+- `commands.ts` — `/gsd inspect` command registration
+- `types.ts` — Decision/Requirement interface additions
+
+## Open Questions
+
+- Whether memory-db's custom DECISIONS.md parser handles the current format (pipe tables with supersession chains) — needs verification during S02 implementation
+- Whether current `deriveState()` batch-parse logic is structurally compatible with the DB-first replacement — needs verification during S04
--- a/.gsd/milestones/M004/M004-META.json
+++ b/.gsd/milestones/M004/M004-META.json
@ -0,0 +1,3 @@
+{
+  "integrationBranch": "main"
+}
--- a/.gsd/milestones/M004/M004-ROADMAP.md
+++ b/.gsd/milestones/M004/M004-ROADMAP.md
@ -0,0 +1,197 @@
+# M004: SQLite Context Store — Surgical Prompt Injection
+
+**Vision:** Replace GSD's whole-file prompt dumps with a SQLite-backed query layer that surgically injects only the context each dispatch unit needs — delivering ≥30% token savings, eliminating context pollution, and enabling structured LLM output that bypasses fragile markdown parsing.
+
+## Success Criteria
+
+- All prompt builders use DB queries for context injection (zero direct `inlineGsdRootFile` for data artifacts in prompt builders)
+- Existing GSD projects migrate silently to DB on first run with zero data loss
+- Planning and research dispatch units show ≥30% fewer prompt characters on mature projects
+- System works identically via fallback when SQLite unavailable — no crash, transparent degradation
+- Worktree creation copies gsd.db; worktree merge reconciles rows
+- LLM can write decisions/requirements/summaries via structured tool calls
+- `/gsd inspect` shows DB state for debugging
+- Dual-write keeps markdown files in sync with DB state in both directions
+- `deriveState()` reads from DB when available, falls back to filesystem
+- All existing tests continue to pass, TypeScript compiles clean
+
+## Key Risks / Unknowns
+
+- `auto-prompts.ts` has 11 prompt builders with 19 `inlineGsdRootFile` calls — rewiring is high-surface-area
+- `handleAgentEnd` has new post-unit-hook/doctor/rebuildState machinery — dual-write re-import must integrate cleanly
+- Memory-db's custom markdown parsers may not handle format changes since the fork point
+- `node:sqlite` is experimental — API stability risk (mitigated by DbAdapter abstraction)
+
+## Proof Strategy
+
+- SQLite provider risk → retire in S01 by proving tiered chain loads and queries on target platform
+- Parser/format risk → retire in S02 by round-trip testing every artifact type against current file formats
+- Prompt builder rewiring risk → retire in S03 by verifying all 11 builders produce correct output with DB vs markdown
+- Worktree integration risk → retire in S05 by testing copy/reconcile against current worktree architecture
+
+## Verification Classes
+
+- Contract verification: unit tests for DB layer, importers, query layer, state derivation, writer, tools. Round-trip fidelity tests for migration.
+- Integration verification: prompt builders produce equivalent output with DB vs markdown. Full auto-mode cycle completes. Worktree DB copy/merge works.
+- Operational verification: graceful fallback when SQLite unavailable. Token measurement reports savings ≥30%.
+- UAT / human verification: user runs auto-mode on a real project and confirms output quality equivalent or better
+
+## Milestone Definition of Done
+
+This milestone is complete only when all are true:
+
+- All prompt builders in `auto-prompts.ts` use DB queries for context injection
+- Silent auto-migration works on existing GSD projects with all artifact types
+- Dual-write keeps markdown files in sync with DB state (both directions)
+- Graceful fallback to markdown when SQLite unavailable
+- Token measurement shows ≥30% reduction on planning/research units
+- `deriveState()` derives from DB, producing identical GSDState output
+- Worktree DB copy and merge reconciliation work with current worktree architecture
+- Structured LLM tools registered and functional with DB-first write
+- `/gsd inspect` command works
+- All existing tests pass, new DB test suite passes, `npx tsc --noEmit` clean
+- Success criteria re-checked against live behavior
+
+## Requirement Coverage
+
+- Covers: R045, R046, R047, R048, R049, R050, R051, R052, R053, R054, R055, R056, R057
+- Partially covers: none
+- Leaves for later: none
+- Orphan risks: none
+
+## Slices
+
+- [ ] **S01: DB Foundation + Schema** `risk:high` `depends:[]`
+  > After this: SQLite DB opens with tiered provider chain, schema inits with decisions/requirements/artifacts tables plus filtered views, typed CRUD wrappers work, graceful fallback returns empty results when SQLite unavailable. Proven by unit tests against real DB.
+
+- [ ] **S02: Markdown Importers + Auto-Migration** `risk:medium` `depends:[S01]`
+  > After this: Existing GSD project with markdown files starts up → gsd.db appears silently with all artifact types imported. Round-trip fidelity proven for every artifact type — import then regenerate produces identical output.
+
+- [ ] **S03: Surgical Prompt Injection + Dual-Write** `risk:high` `depends:[S01,S02]`
+  > After this: All 11 `build*Prompt()` functions in `auto-prompts.ts` use scoped DB queries instead of `inlineGsdRootFile`. Decisions filtered by milestone, requirements filtered by slice. Dual-write re-import in `handleAgentEnd` keeps DB in sync after each dispatch unit. Falls back to filesystem when DB unavailable.
+
+- [ ] **S04: Token Measurement + State Derivation** `risk:medium` `depends:[S03]`
+  > After this: `promptCharCount`/`baselineCharCount` in UnitMetrics, measurement wired into all `snapshotUnitMetrics` call sites. `deriveState()` reads content from DB when available. Savings ≥30% confirmed on fixture data.
+
+- [ ] **S05: Worktree DB Isolation** `risk:medium` `depends:[S01,S02]`
+  > After this: `createWorktree` copies gsd.db to new worktrees (sync, non-fatal). Merge paths reconcile worktree DB rows back via ATTACH DATABASE with conflict detection.
+
+- [ ] **S06: Structured LLM Tools + /gsd inspect** `risk:medium` `depends:[S03]`
+  > After this: LLM writes decisions/requirements/summaries via tool calls that write to DB first, then regenerate markdown. `/gsd inspect` dumps schema version, table counts, recent entries.
+
+- [ ] **S07: Integration Verification + Polish** `risk:low` `depends:[S03,S04,S05,S06]`
+  > After this: Full auto-mode lifecycle test proves all subsystems compose correctly — migration → scoped queries → formatted prompts → token savings → re-import → round-trip. Edge cases (empty projects, partial migrations, fallback mode) verified. ≥30% savings confirmed on realistic fixture data.
+
+## Boundary Map
+
+### S01 → S02
+
+Produces:
+- `gsd-db.ts` → `openDatabase()`, `closeDatabase()`, `initSchema()`, `migrateSchema()`, typed insert/query wrappers for decisions, requirements, artifacts tables
+- `gsd-db.ts` → `isDbAvailable()` boolean, `getDbProvider()` provider name
+- `gsd-db.ts` → `insertDecision()`, `insertRequirement()`, `insertArtifact()`, `upsertDecision()`, `upsertRequirement()`
+- `gsd-db.ts` → `transaction()` wrapper for batch operations
+- `context-store.ts` → `queryDecisions(opts?)`, `queryRequirements(opts?)`, `queryArtifact(path)`, `queryProject()`
+- `context-store.ts` → `formatDecisionsForPrompt()`, `formatRequirementsForPrompt()`
+- `types.ts` → `Decision`, `Requirement` interfaces
+- Fallback: all query functions return empty when DB unavailable
+
+Consumes:
+- nothing (first slice)
+
+### S01 → S03
+
+Produces:
+- Same as S01 → S02 (DB layer + query functions + formatters)
+- `isDbAvailable()` for conditional DB vs markdown loading in prompt builders
+
+Consumes:
+- nothing (first slice)
+
+### S01 → S05
+
+Produces:
+- `gsd-db.ts` → `copyWorktreeDb(srcPath, destPath)` — sync file copy
+- `gsd-db.ts` → `reconcileWorktreeDb(mainDbPath, worktreeDbPath)` — ATTACH-based merge
+- `openDatabase()` for opening DB at arbitrary paths
+
+Consumes:
+- nothing (first slice)
+
+### S02 → S03
+
+Produces:
+- `md-importer.ts` → `migrateFromMarkdown(basePath)` — full project import function
+- `md-importer.ts` → individual parsers for all artifact types
+- Auto-migration detection and execution wired into `startAuto()`
+
+Consumes from S01:
+- `gsd-db.ts` → `openDatabase()`, typed insert wrappers, `transaction()`
+- Schema tables for all artifact types
+
+### S02 → S05
+
+Produces:
+- `md-importer.ts` → `migrateFromMarkdown()` for importing markdown into a fresh worktree DB
+
+Consumes from S01:
+- `gsd-db.ts` → database layer
+
+### S03 → S04
+
+Produces:
+- All `build*Prompt()` functions rewired to use DB queries
+- DB-aware inline helpers: `inlineDecisionsFromDb()`, `inlineRequirementsFromDb()`, `inlineProjectFromDb()`
+- Dual-write re-import in `handleAgentEnd`
+
+Consumes from S01:
+- `context-store.ts` → query functions and formatters
+- `gsd-db.ts` → `isDbAvailable()`
+
+Consumes from S02:
+- `md-importer.ts` → `migrateFromMarkdown()` for re-import after auto-commit
+
+### S03 → S06
+
+Produces:
+- `context-store.ts` → complete query layer that structured tools can use
+- Dual-write infrastructure (re-import pattern)
+
+Consumes from S01:
+- `gsd-db.ts` → typed upsert wrappers
+
+### S04 → S07
+
+Produces:
+- Token measurement in `UnitMetrics` (`promptCharCount`, `baselineCharCount`)
+- `deriveState()` DB-first content loading
+- Measurement infrastructure in `dispatchNextUnit`
+
+Consumes from S03:
+- Rewired prompt builders
+
+### S05 → S07
+
+Produces:
+- `copyWorktreeDb` wired into `createWorktree`
+- `reconcileWorktreeDb` wired into merge paths
+
+Consumes from S01:
+- `gsd-db.ts` → `copyWorktreeDb()`, `reconcileWorktreeDb()`, `openDatabase()`
+
+Consumes from S02:
+- `md-importer.ts` → `migrateFromMarkdown()` for fallback import
+
+### S06 → S07
+
+Produces:
+- 3 structured LLM tools registered: `gsd_save_decision`, `gsd_update_requirement`, `gsd_save_summary`
+- `/gsd inspect` slash command with autocomplete
+
+Consumes from S03:
+- `context-store.ts` → query layer for inspect output
+- Dual-write infrastructure for tool-triggered markdown regeneration
+
+Consumes from S01:
+- `gsd-db.ts` → `upsertDecision()`, `upsertRequirement()`, `insertArtifact()`
+- `db-writer.ts` → `generateDecisionsMd()`, `generateRequirementsMd()`, DB-first write helpers
--- a/.plans/dynamic-model-discovery.md
+++ b/.plans/dynamic-model-discovery.md
@ -0,0 +1,27 @@
+# Dynamic Model Discovery
+
+## Overview
+Runtime model discovery from provider APIs with caching, TUI management, and CLI flags.
+
+## Components
+1. **model-discovery.ts** — Provider adapters (OpenAI, Ollama, OpenRouter, Google) + static adapters
+2. **discovery-cache.ts** — Disk cache at `{agentDir}/discovery-cache.json` with per-provider TTLs
+3. **models-json-writer.ts** — Safe read-modify-write for `models.json` with file locking
+4. **provider-manager.ts** — TUI component for provider management (`/provider` command)
+5. **model-registry.ts** — Extended with `discoverModels()`, `getAllWithDiscovered()`, cache integration
+6. **settings-manager.ts** — `modelDiscovery` settings (enabled, providers, ttlMinutes, autoRefreshOnModelSelect)
+7. **args.ts** — `--discover`, `--add-provider`, `--base-url`, `--discover-models` CLI flags
+8. **list-models.ts** — Rewritten with `[discovered]` badge support
+9. **main.ts** — CLI handlers for new flags
+10. **interactive-mode.ts** — `/provider` command handler
+11. **preferences.ts** — `updatePreferencesModels()` and `validateModelId()` helpers
+
+## TTL Strategy
+- Ollama: 5 min (local, models change often)
+- OpenAI / Google / OpenRouter: 1 hour
+- Default: 24 hours
+
+## Merge Rules
+- Discovered models never override existing built-in or custom models
+- Discovered models are appended to the registry with `[discovered]` badge
+- Background discovery is opt-in via `modelDiscovery.enabled` setting
--- a/.plans/issue-524-git2-migration.md
+++ b/.plans/issue-524-git2-migration.md
@ -0,0 +1,282 @@
+# Issue #524: Move Git Operations to Rust via git2 Crate
+
+## Current State
+
+- **git2** crate (v0.20) already a dependency with vendored libgit2
+- **7 read-only** functions already native in `git.rs` + `native-git-bridge.ts`:
+  - `git_current_branch`, `git_main_branch`, `git_branch_exists`
+  - `git_has_merge_conflicts`, `git_working_tree_status`, `git_has_changes`
+  - `git_commit_count_between`
+- **~73 execSync/execFileSync git calls** remain across 14 TypeScript files
+- All native functions follow the same pattern: native-first with execSync fallback
+
+## Scope
+
+This plan covers **Phase 1**: migrate all remaining read operations and high-value
+write operations to native git2. Push operations stay as execSync (credential
+handling too complex for git2). The "Additional Rust Opportunities" (state
+derivation, JSONL parser) are out of scope for this PR.
+
+---
+
+## Phase 1: New Native Read Functions (git.rs)
+
+### 1.1 — `git_is_repo(path: String) -> bool`
+Replaces: `git rev-parse --git-dir` (3 calls in auto.ts, guided-flow.ts, doctor.ts)
+Implementation: `Repository::open(path).is_ok()`
+
+### 1.2 — `git_has_staged_changes(repo_path: String) -> bool`
+Replaces: `git diff --cached --stat` (2 calls in git-service.ts)
+Implementation: Diff index vs HEAD tree, check if delta count > 0
+
+### 1.3 — `git_diff_stat(repo_path, from_ref?, to_ref?) -> GitDiffStat`
+Replaces: `git diff --stat HEAD`, `git diff --stat --cached HEAD` (session-forensics.ts)
+Returns: `{ files_changed: u32, insertions: u32, deletions: u32, summary: String }`
+Implementation: Diff between two trees/index/workdir, count deltas
+
+### 1.4 — `git_diff_name_status(repo_path, from_ref, to_ref, pathspec?) -> Vec<GitNameStatus>`
+Replaces: `git diff --name-status main...branch -- .gsd/` (worktree-manager.ts, 3 calls)
+Returns: `Vec<{ status: String, path: String }>`
+Implementation: Tree-to-tree diff with pathspec filter
+
+### 1.5 — `git_diff_numstat(repo_path, from_ref, to_ref) -> Vec<GitNumstat>`
+Replaces: `git diff --numstat main branch` (worktree-manager.ts, 1 call)
+Returns: `Vec<{ added: u32, removed: u32, path: String }>`
+
+### 1.6 — `git_diff_content(repo_path, from_ref, to_ref, pathspec?, exclude?) -> String`
+Replaces: `git diff main...branch -- .gsd/` and `-- . :(exclude).gsd/` (worktree-manager.ts, 2 calls)
+Returns: Unified diff string
+
+### 1.7 — `git_log_oneline(repo_path, from_ref, to_ref) -> Vec<GitLogEntry>`
+Replaces: `git log --oneline main..branch` (worktree-manager.ts, 1 call)
+Returns: `Vec<{ sha: String, message: String }>`
+
+### 1.8 — `git_worktree_list(repo_path) -> Vec<GitWorktreeEntry>`
+Replaces: `git worktree list --porcelain` (worktree-manager.ts, 2 calls)
+Returns: `Vec<{ path: String, branch: String, is_bare: bool }>`
+Implementation: `Repository::worktrees()` + individual worktree info
+
+### 1.9 — `git_branch_list(repo_path, pattern?) -> Vec<String>`
+Replaces: `git branch --list milestone/*`, `git branch --list gsd/*` (doctor.ts, commands.ts, 3 calls)
+Returns: Branch names matching pattern
+
+### 1.10 — `git_branch_list_merged(repo_path, target, pattern?) -> Vec<String>`
+Replaces: `git branch --merged main --list gsd/*` (commands.ts, 1 call)
+Returns: Branch names merged into target
+
+### 1.11 — `git_ls_files(repo_path, pathspec) -> Vec<String>`
+Replaces: `git ls-files "<exclusion>"` (doctor.ts, 1 call)
+Implementation: Read index, filter by pathspec
+
+### 1.12 — `git_for_each_ref(repo_path, prefix) -> Vec<String>`
+Replaces: `git for-each-ref refs/gsd/snapshots/ --format=%(refname)` (commands.ts, 1 call)
+Implementation: `repo.references_glob(prefix/*)`
+
+### 1.13 — `git_conflict_files(repo_path) -> Vec<String>`
+Replaces: `git diff --name-only --diff-filter=U` (auto-worktree.ts, 1 call)
+Implementation: Read index conflicts
+
+### 1.14 — `git_batch_info(repo_path) -> GitBatchInfo`
+NEW batch function: status + branch + diff summary in ONE call
+Returns: `{ branch: String, has_changes: bool, status: String, staged_count: u32, unstaged_count: u32 }`
+
+---
+
+## Phase 2: New Native Write Functions (git.rs)
+
+### 2.1 — `git_init(path, branch?) -> void`
+Replaces: `git init -b <branch>` (auto.ts, guided-flow.ts, 2 calls)
+Implementation: `Repository::init()` + set initial branch
+
+### 2.2 — `git_add_all(repo_path) -> void`
+Replaces: `git add -A` (auto-worktree.ts, git-service.ts, 4 calls)
+Implementation: Add all to index via `repo.index().add_all()`
+
+### 2.3 — `git_add_paths(repo_path, paths: Vec<String>) -> void`
+Replaces: `git add -- <file>` (auto-worktree.ts, git-service.ts, 3 calls)
+Implementation: Add specific paths to index
+
+### 2.4 — `git_reset_paths(repo_path, paths: Vec<String>) -> void`
+Replaces: `git reset HEAD -- <path>` (git-service.ts, in loop)
+Implementation: Reset index entries to HEAD for specific paths
+
+### 2.5 — `git_commit(repo_path, message, options?) -> String`
+Replaces: `git commit -m <msg>`, `git commit --no-verify -F -` (11+ calls across files)
+Returns: Commit SHA
+Implementation: Write index as tree → create commit → update HEAD
+Options: `{ allow_empty: bool }`
+
+### 2.6 — `git_checkout_branch(repo_path, branch) -> void`
+Replaces: `git checkout <branch>` (auto-worktree.ts, 1 call)
+Implementation: Set HEAD + checkout tree
+
+### 2.7 — `git_checkout_theirs(repo_path, paths: Vec<String>) -> void`
+Replaces: `git checkout --theirs -- <file>` (auto-worktree.ts, in loop)
+Implementation: Resolve index conflict with "theirs" strategy
+
+### 2.8 — `git_merge_squash(repo_path, branch) -> GitMergeResult`
+Replaces: `git merge --squash <branch>` (auto-worktree.ts, worktree-manager.ts, 3 calls)
+Returns: `{ success: bool, conflicts: Vec<String> }`
+Implementation: Find merge base → merge trees → apply to index
+
+### 2.9 — `git_merge_abort(repo_path) -> void`
+Replaces: `git merge --abort` (git-self-heal.ts, worktree-command.ts, 2 calls)
+Implementation: Reset to ORIG_HEAD, clean merge state
+
+### 2.10 — `git_rebase_abort(repo_path) -> void`
+Replaces: `git rebase --abort` (git-self-heal.ts, 1 call)
+
+### 2.11 — `git_reset_hard(repo_path) -> void`
+Replaces: `git reset --hard HEAD` (git-self-heal.ts, 1 call)
+Implementation: `repo.reset(HEAD, Hard)`
+
+### 2.12 — `git_branch_delete(repo_path, branch, force: bool) -> void`
+Replaces: `git branch -D/-d <branch>` (5 calls across files)
+Implementation: `repo.find_branch().delete()`
+
+### 2.13 — `git_branch_force_reset(repo_path, branch, target) -> void`
+Replaces: `git branch -f <branch> <target>` (worktree-manager.ts, 1 call)
+
+### 2.14 — `git_rm_cached(repo_path, paths: Vec<String>, recursive: bool) -> Vec<String>`
+Replaces: `git rm --cached -r --ignore-unmatch` (git-service.ts, doctor.ts, gitignore.ts, 6 calls)
+Returns: List of removed paths
+
+### 2.15 — `git_rm_force(repo_path, paths: Vec<String>) -> void`
+Replaces: `git rm --force -- <file>` (auto-worktree.ts, 1 call)
+
+### 2.16 — `git_worktree_add(repo_path, path, branch, create_from?) -> void`
+Replaces: `git worktree add` commands (worktree-manager.ts, 2 calls)
+Implementation: `repo.worktree()` API
+
+### 2.17 — `git_worktree_remove(repo_path, path, force: bool) -> void`
+Replaces: `git worktree remove --force` (worktree-manager.ts, doctor.ts, 3 calls)
+
+### 2.18 — `git_worktree_prune(repo_path) -> void`
+Replaces: `git worktree prune` (worktree-manager.ts, 3 calls)
+
+### 2.19 — `git_revert_commit(repo_path, sha, no_commit: bool) -> void`
+Replaces: `git revert --no-commit <sha>` (undo.ts, 1 call)
+
+### 2.20 — `git_revert_abort(repo_path) -> void`
+Replaces: `git revert --abort` (undo.ts, 1 call)
+
+### 2.21 — `git_update_ref(repo_path, refname, target?) -> void`
+Replaces: `git update-ref <ref> HEAD` and `git update-ref -d <ref>` (git-service.ts, commands.ts, 2 calls)
+When target is null/empty, deletes the ref.
+
+---
+
+## Phase 3: TypeScript Bridge Updates (native-git-bridge.ts)
+
+Add bridge functions for ALL new native functions, each with:
+1. Native-first implementation
+2. execSync fallback for when native module unavailable
+3. Proper error handling
+4. Type definitions
+
+---
+
+## Phase 4: Consumer Migration
+
+Update each TypeScript file to use native bridge functions:
+
+### 4.1 — git-service.ts
+- `smartStage()` → use `nativeAddAll()` + `nativeResetPaths()`
+- `commit()` → use `nativeCommit()`
+- `autoCommit()` → use `nativeHasStagedChanges()`
+- `createSnapshot()` → use `nativeUpdateRef()`
+- Runtime file cleanup → use `nativeRmCached()`
+- `runPreMergeCheck()` → use `nativeReadFile()` or keep fs.readFileSync (not git)
+
+### 4.2 — worktree-manager.ts
+- `getMainBranch()` → use `nativeDetectMainBranch()` (already exists!)
+- `createWorktree()` → use `nativeWorktreeAdd()`, `nativeBranchForceReset()`
+- `listWorktrees()` → use `nativeWorktreeList()`
+- `removeWorktree()` → use `nativeWorktreeRemove()`, `nativeWorktreePrune()`, `nativeBranchDelete()`
+- `diffWorktreeGSD()` → use `nativeDiffNameStatus()`
+- `diffWorktreeAll()` → use `nativeDiffNameStatus()`
+- `diffWorktreeNumstat()` → use `nativeDiffNumstat()`
+- `getWorktreeGSDDiff()` → use `nativeDiffContent()`
+- `getWorktreeCodeDiff()` → use `nativeDiffContent()`
+- `getWorktreeLog()` → use `nativeLogOneline()`
+- `mergeWorktreeToMain()` → use `nativeMergeSquash()` + `nativeCommit()`
+
+### 4.3 — auto-worktree.ts
+- `getCurrentBranch()` → use `nativeGetCurrentBranch()` (already exists!)
+- `autoCommitDirtyState()` → use `nativeWorkingTreeStatus()` + `nativeAddAll()` + `nativeCommit()`
+- `mergeMilestoneToMain()` → use native merge, checkout, commit, branch delete
+
+### 4.4 — auto.ts
+- `git rev-parse --git-dir` → use `nativeIsRepo()`
+- `git init -b` → use `nativeInit()`
+- `git add -A .gsd .gitignore && git commit` → use `nativeAddPaths()` + `nativeCommit()`
+
+### 4.5 — auto-supervisor.ts
+- `detectWorkingTreeActivity()` → use `nativeHasChanges()` (already exists!)
+
+### 4.6 — git-self-heal.ts
+- `abortAndReset()` → use `nativeMergeAbort()` + `nativeRebaseAbort()` + `nativeResetHard()`
+
+### 4.7 — guided-flow.ts
+- Same pattern as auto.ts for init + bootstrap
+
+### 4.8 — doctor.ts
+- `git rev-parse --git-dir` → use `nativeIsRepo()`
+- `git worktree remove --force` → use `nativeWorktreeRemove()`
+- `git branch --list milestone/*` → use `nativeBranchList()`
+- `git branch -D` → use `nativeBranchDelete()`
+- `git ls-files` → use `nativeLsFiles()`
+- `git rm --cached` → use `nativeRmCached()`
+- `git branch --format...` → use `nativeBranchList()`
+
+### 4.9 — gitignore.ts
+- `untrackRuntimeFiles()` → use `nativeRmCached()`
+
+### 4.10 — commands.ts
+- `handleCleanupBranches()` → use `nativeBranchList()`, `nativeBranchListMerged()`, `nativeBranchDelete()`
+- `handleCleanupSnapshots()` → use `nativeForEachRef()`, `nativeUpdateRef()`
+
+### 4.11 — undo.ts
+- `git revert --no-commit` → use `nativeRevertCommit()`
+- `git revert --abort` → use `nativeRevertAbort()`
+
+### 4.12 — session-forensics.ts
+- `getGitChanges()` → use `nativeWorkingTreeStatus()` + `nativeDiffStat()`
+
+### 4.13 — worktree-command.ts
+- `git merge --abort` → use `nativeMergeAbort()`
+
+---
+
+## Kept as execSync (out of scope)
+
+- `git push <remote> <branch>` — Credential handling too complex for git2
+- `cat package.json` — Not a git command (already just fs.readFileSync)
+- `npm test` / custom commands — Not git operations
+
+---
+
+## Implementation Order
+
+1. **Rust functions** (git.rs) — all read functions first, then write functions
+2. **TypeScript bridge** (native-git-bridge.ts) — add all new bridge functions
+3. **Consumer migration** — update each .ts file to use bridge functions
+4. **Remove dead code** — delete local `runGit()` helpers from files that no longer need them
+5. **Testing** — build native module, run CI, verify all operations work
+
+---
+
+## Risk Mitigation
+
+- Every native function has an execSync fallback in the bridge
+- Write operations are tested by existing integration tests
+- git2's vendored libgit2 matches git CLI behavior for standard operations
+- The `loadNative()` pattern means if ANY native function crashes, ALL functions fall back to CLI
+
+## Expected Impact
+
+- **~70 execSync calls eliminated** when native module is available
+- **Zero process spawns** for git operations in the common path
+- **Batch operations** (git_batch_info) reduce 3-4 calls to 1
+- **Type-safe errors** instead of parsing stderr strings
+- **Consistent cross-platform** behavior via libgit2
--- a/.plans/native-perf-optimizations.md
+++ b/.plans/native-perf-optimizations.md
@ -0,0 +1,133 @@
+# Native Performance Optimizations — deriveState, JSONL, Paths, Parsing
+
+## Overview
+
+Four native Rust optimizations to eliminate hot-path bottlenecks in GSD's dispatch cycle.
+Building on the existing git2 migration and native parser infrastructure.
+
+---
+
+## 1. Native deriveState — Eliminate Frontmatter Re-serialization
+
+### Problem
+`state.ts:134-176` — When `nativeBatchParseGsdFiles()` returns parsed files, the JS
+side re-serializes frontmatter back into YAML strings so downstream parsers can re-parse
+them. This is a round-trip waste: Rust parses → JS re-serializes → JS re-parses.
+
+### Solution
+The native batch parser already returns `{ metadata: JSON, body, sections }`.
+Instead of re-serializing frontmatter to YAML in JS, modify `cachedLoadFile()` to
+return the raw body directly, and update downstream parsers to accept pre-parsed
+metadata. This eliminates the entire lines 143-172 re-serialization loop.
+
+However, the parsers (`parseRoadmap`, `parseSummary`, `parsePlan`, etc.) all expect
+raw markdown strings with frontmatter. Changing their signatures would be a massive
+refactor. Instead:
+
+**Approach: Make Rust return the original file content alongside parsed data.**
+
+Add a new field `rawContent: String` to `ParsedGsdFile` that contains the complete
+original file content. The JS batch cache stores this directly, eliminating the
+re-serialization entirely. Downstream parsers get exactly what `loadFile()` would return.
+
+### Implementation
+- **Rust** (`gsd_parser.rs`): Add `raw_content` field to `ParsedGsdFile`, populate with
+  the original file content read from disk.
+- **TS** (`native-parser-bridge.ts`): Expose `rawContent` in `BatchParsedFile`.
+- **TS** (`state.ts`): Replace the 30-line re-serialization loop with
+  `fileContentCache.set(absPath, f.rawContent)`.
+
+### Impact
+Eliminates ~30 lines of JS string building per dispatch. Removes JSON.parse of metadata
+that was only used to re-serialize back to YAML.
+
+---
+
+## 2. Native JSONL Streaming Parser
+
+### Problem
+`session-forensics.ts:68-78` — Parses JSONL by `split("\n").map(JSON.parse)` with a
+10MB cap. Large session files cause OOM or slowness.
+
+### Solution
+Add a Rust JSONL parser that streams through the file with constant memory, returning
+structured data. Uses `serde_json` for parsing and handles arbitrary file sizes.
+
+### Implementation
+- **Rust** (`gsd_parser.rs`): Add `parse_jsonl_tail(path, max_entries?)` function that:
+  1. Memory-maps or streams the file from the tail
+  2. Parses each line as JSON
+  3. Returns the last N entries as a JSON array string
+- **TS** (`native-parser-bridge.ts`): Add bridge function.
+- **TS** (`session-forensics.ts`): Use native parser, fall back to JS implementation.
+
+### Impact
+Handles arbitrary file sizes. 3-5x faster parsing on 10MB files.
+
+---
+
+## 3. Native Directory Tree Index
+
+### Problem
+`paths.ts:20-34` — `cachedReaddirSync()` caches per-directory, but caches are
+cleared every dispatch via `invalidateAllCaches()`. Each `resolveMilestoneFile`,
+`resolveSliceFile`, `resolveTaskFile` triggers separate directory reads.
+
+### Solution
+Add a Rust function that walks the entire `.gsd/` tree once and returns a flat
+file listing. The JS side builds a Map from this, making all path resolution O(1)
+lookups instead of repeated `readdirSync` + regex matching.
+
+### Implementation
+- **Rust** (`gsd_parser.rs`): The `batchParseGsdFiles` already walks the tree.
+  Add `scan_gsd_tree(directory)` that returns `Vec<{ path, isDir, name }>` for
+  ALL entries (not just .md files).
+- **TS** (`native-parser-bridge.ts`): Add bridge function.
+- **TS** (`paths.ts`): Add native tree cache. On first access, call native scan
+  and build lookup maps. `clearPathCache()` clears the native cache too.
+
+### Impact
+Eliminates 20-50 `readdirSync` calls per dispatch. Makes `resolveDir`/`resolveFile`
+O(1) lookups.
+
+---
+
+## 4. Expand Native Markdown Parsing
+
+### Problem
+`files.ts` parsers (`parsePlan`, `parseSummary`, `parseContinue`) still use JS regex.
+Each runs ~10-20 regex patterns per file. Only `parseRoadmap` has a native implementation.
+
+### Solution
+Add native Rust implementations for `parsePlan` and `parseSummary` — the two parsers
+called most frequently during `deriveState`. `parseContinue` is called infrequently
+and can stay in JS.
+
+### Implementation
+- **Rust** (`gsd_parser.rs`): Add `parse_plan_file(content)` and `parse_summary_file(content)`.
+- **TS** (`native-parser-bridge.ts`): Add bridge functions with JS fallback.
+- **TS** (`files.ts`): Call native versions first, fall back to JS.
+
+### Impact
+3-5x faster parsing per file. With ~20 files per deriveState, saves 20-40ms.
+
+---
+
+## Implementation Order
+
+1. **deriveState raw content** (smallest change, biggest immediate impact)
+2. **Directory tree index** (eliminates readdirSync overhead)
+3. **JSONL streaming parser** (helps crash recovery path)
+4. **Plan/Summary native parsers** (improves parsing throughput)
+
+## Files Modified
+
+### Rust
+- `native/crates/engine/src/gsd_parser.rs` — new functions + rawContent field
+
+### TypeScript
+- `src/resources/extensions/gsd/native-parser-bridge.ts` — new bridge functions
+- `src/resources/extensions/gsd/state.ts` — simplified batch cache
+- `src/resources/extensions/gsd/paths.ts` — native tree cache
+- `src/resources/extensions/gsd/session-forensics.ts` — native JSONL
+- `src/resources/extensions/gsd/files.ts` — native plan/summary parsers
--- a/.plans/preferences-wizard-completeness.md
+++ b/.plans/preferences-wizard-completeness.md
@ -0,0 +1,49 @@
+# Preferences Wizard Completeness
+
+## Problem
+The `/gsd prefs wizard` currently only configures 6 of 18+ preference fields. Users must hand-edit YAML for the rest.
+
+## Current Wizard Coverage
+1. Models (per phase) ✓
+2. Auto-supervisor timeouts ✓
+3. Git main_branch ✓
+4. Skill discovery mode ✓
+5. Unique milestone IDs ✓
+
+## Missing Fields to Add
+
+### Group 1: Git Settings (expand existing section)
+- `auto_push` (boolean) — auto-push commits ✓
+- `push_branches` (boolean) — push milestone branches ✓
+- `remote` (string) — git remote name ✓
+- `snapshots` (boolean) — WIP snapshot commits ✓
+- `pre_merge_check` (boolean | "auto") — pre-merge validation ✓
+- `commit_type` (select) — conventional commit prefix ✓
+- `merge_strategy` (select) — squash vs merge ✓
+- `isolation` (select) — worktree vs branch ✓
+
+### Group 2: Budget & Cost Control ✓
+- `budget_ceiling` (number) — dollar limit
+- `budget_enforcement` (select: warn/pause/halt)
+- `context_pause_threshold` (number 0-100)
+
+### Group 3: Notifications ✓
+- `notifications.enabled` (boolean)
+- `notifications.on_complete` (boolean)
+- `notifications.on_error` (boolean)
+- `notifications.on_budget` (boolean)
+- `notifications.on_milestone` (boolean)
+- `notifications.on_attention` (boolean)
+
+### Group 4: Behavior Toggles ✓
+- `uat_dispatch` (boolean)
+
+### Group 5: Update Serialization Order ✓
+- Added missing keys to `orderedKeys` in `serializePreferencesToFrontmatter()`
+
+### Group 6: Update Template & Docs ✓
+- Updated `templates/preferences.md` with new fields
+- Updated `docs/preferences-reference.md` with budget, notifications, git, hooks
+
+### Group 7: Tests ✓
+- Added `preferences-wizard-fields.test.ts` covering all new fields
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,137 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

 ## [Unreleased]

+## [2.17.0] - 2026-03-15
+
+### Added
+- **Token optimization profiles** — `budget`, `balanced`, and `quality` presets that coordinate model selection, phase skipping, and context compression to reduce token usage by 40-60% on budget mode
+- **Complexity-based task routing** — automatically classifies tasks as simple/standard/heavy and routes to appropriate models, with persistent learning from routing history
+- **`git.commit_docs` preference** — set to `false` to keep `.gsd/` planning artifacts local-only, useful for teams where only some members use GSD
+
+### Changed
+- Updated Ollama cloud provider model catalog
+
+### Fixed
+- Native binary hangs in GSD auto-mode paths (#453)
+- Auto-mode can be stopped from a different terminal (#586)
+- Parse cache collision causing false loop detection on `complete-slice` (#583)
+- Exhaustive switch handling and cleanup in Google provider (#587)
+
+## [2.16.0] - 2026-03-15
+
+### Added
+- `/gsd steer` command — hard-steer plan documents during execution without stopping the pipeline
+- Native git operations via libgit2 — ~70 fewer process spawns per dispatch cycle
+- Native performance optimizations for `deriveState`, JSONL parsing, and path resolution
+- Default model upgraded to Opus 4.6 with 1M context variant
+- PR template and bug report issue template
+
+### Fixed
+- Auto-mode continues after guided milestone planning instead of stalling at "Milestone planned"
+- Git commands no longer fail when repo path contains spaces
+- Arrow key cursor updates and Shift+Enter newline insertion in TUI
+- Tool API keys loaded from `auth.json` at session startup
+- TypeScript errors resolved across extension, test, and async-jobs files
+
+### Changed
+- Hot-path lookup caching and error resilience optimizations
+- Extension type-checking added to CI pipeline
+
+## [2.15.1] - 2026-03-15
+
+### Fixed
+- Auto-mode worktree path resolution — prompt templates now include working directory, preventing artifacts from being written to the wrong location and causing infinite re-dispatches
+- Auto-mode resource sync detection — gracefully stops when resources change mid-session instead of crashing
+- Auto-mode missing import for `resolveSkillDiscoveryMode` causing crash on startup
+- Auto-mode recovery hardened — checkbox verification falls through correctly, corrupt roadmaps fail verification instead of silently passing, atomic writes for completed-units.json, and task completion verified via artifacts not just file existence
+- Auto-mode progress widget now refreshes from disk every 5 seconds during unit execution instead of appearing frozen
+- Undo command now invalidates all caches (not just state cache), preventing stale results after undoing completed tasks
+
+### Changed
+- CI pipeline supports prerelease publishing with `--tag next` for testing before stable release
+
+### Added
+- Unit tests for auto-dashboard, auto-recovery, and crash-recovery modules (46 new tests)
+
+## [2.15.0] - 2026-03-15
+
+### Added
+- **8 new commands**: budget enforcement, notifications, and quality-of-life improvements (#441)
+- **Preferences schema validation**: detects unknown/typo'd preference keys and surfaces warnings instead of silently ignoring them (#542)
+- **Pipeline-aware prompts**: each agent phase (research, plan, execute, complete) now knows its role in the pipeline, eliminating redundant code exploration between phases (#543)
+- **Research depth calibration**: three-tier system (deep/targeted/light) so agents match effort to actual complexity (#543)
+
+### Changed
+- Auto-mode decomposed into focused modules for maintainability (#534)
+- Dispatch logic extracted from if-else chain to dispatch table (#539)
+- v1 migration code gated behind dynamic import — only loaded when needed (#541)
+- Background shell module decomposed into focused modules
+- Unified cache invalidation into single `invalidateAllCaches()` function (#545)
+
+### Fixed
+- Executor agents now receive explicit working directory, preventing writes to main repo instead of worktree (#543)
+- Merge loop and .gsd/ conflict auto-resolution in worktree model, `git.isolation` preference restored (#536)
+- Arrow keys no longer insert escape sequences as text during LLM streaming (#493)
+- YAML preferences parser hardened for OpenRouter model IDs with special characters (#488)
+- `@` file autocomplete debounced to prevent TUI freeze on large codebases (#448)
+- Auto-mode stops cleanly when dispatch gap watchdog fails (#537)
+- Synchronous I/O removed from hot paths (#540)
+- Silent catch blocks now capture error references for crash diagnostics (#546)
+- `ctx.log` error in GSD provider recovery path fixed
+- TUI resource leaks patched in loader, cancellable-loader, input, and editor components (#482)
+- Hardcoded ANSI escapes replaced with chalk for consistent terminal handling (#482)
+
+## [2.14.4] - 2026-03-15
+
+### Fixed
+- **Session cwd update** — `newSession()` now updates the LLM's perceived working directory to reflect `process.chdir()` into auto-worktrees. Previously the system prompt was frozen at the original project root, causing the LLM to `cd` back and write files to the wrong location. This was the root cause of complete-slice and plan-slice loops in worktree-based projects.
+
+## [2.14.3] - 2026-03-15
+
+### Fixed
+- **Copy planning artifacts into new auto-worktrees** — `createAutoWorktree` now copies `.gsd/milestones/`, `DECISIONS.md`, `REQUIREMENTS.md`, `PROJECT.md` from the source repo into the worktree. Prevents plan-slice loops in projects with pre-v2.14.0 `.gitignore`.
+
+## [2.14.2] - 2026-03-15
+
+### Fixed
+- **Dispatch reentrancy deadlock** — `_dispatching` flag was never reset after first dispatch, permanently blocking all subsequent unit dispatches. Wrapped in try/finally.
+- **`.gitignore` self-heal** — existing projects with blanket `.gsd/` ignore now auto-remove it on next auto-mode start, replacing with explicit runtime-only patterns so planning artifacts are tracked in git.
+- **Discuss depth verification** — render summary as chat text (markdown renders), use ask_user_questions for short confirmation only.
+
+## [2.14.1] - 2026-03-15
+
+### Fixed
+- **Quiet auto-mode warnings** — internal recovery machinery (dispatch gap watchdog, model fallback chain) downgraded to verbose-only. Users only see warnings when action is needed.
+- **Dispatch recovery hardening** — artifact fallback when completion key missing, TUI freeze prevention, reentrancy guard, atomic writes, stale runtime record cleanup
+
+## [2.14.0] - 2026-03-15
+
+### Added
+- **Discussion manifest** — mechanical process verification for multi-milestone context discussions
+- **Session-internal `/gsd config`** — configure GSD settings within a running session
+- **Model selection UI** — select list instead of free-text input for model preferences
+- **Startup performance** — faster GSD launch via optimized initialization
+
+### Changed
+- **Branchless worktree architecture** — eliminated slice branches entirely. All work commits sequentially on `milestone/<MID>` within auto-mode worktrees. No branch creation, switching, or merging within a worktree. ~2600 lines of merge/conflict/branch-switching code removed.
+- **`.gitignore` overhaul** — planning artifacts (`.gsd/milestones/`) are tracked in git naturally. Only runtime files are gitignored. No more force-add hacks.
+- **Multi-milestone enforcement** — `depends_on` frontmatter enforced in multi-milestone CONTEXT.md
+
+### Fixed
+- **Auto-mode loop detection failures** — artifacts on wrong branch or invisible after branch switch no longer possible (root cause eliminated by branchless architecture)
+- **Nested worktree creation** — auto-mode no longer creates worktrees inside existing manual worktrees, preventing wrong-repo state reads and "All milestones complete" false positives
+- **Dispatch recovery hardening** — artifact fallback when completion key missing, TUI freeze prevention on cascading skips, reentrancy guard, atomic writes, stale runtime record cleanup, git index.lock cleanup
+- **Hook orchestration** — finalize runtime records, add supervision, fix retry
+- **Empty slice plan stays in planning** — no longer incorrectly transitions to summarizing
+- **Prefs wizard** — launch directly from `/gsd prefs`, fix parse/serialize cycle for empty arrays
+- **Discussion routing** — `/gsd discuss` routes to draft when phase is needs-discussion
+
+### Removed
+- `ensureSliceBranch()`, `switchToMain()`, `mergeSliceToMain()`, `mergeSliceToMilestone()`
+- `shouldUseWorktreeIsolation()`, `getMergeToMainMode()`, `buildFixMergePrompt()`
+- `withMergeHeal()`, `recoverCheckout()`, `fix-merge` unit type
+- `git.isolation` and `git.merge_to_main` preferences (deprecated with warnings)
+
 ## [2.13.1] - 2026-03-15

 ### Fixed
@ -607,7 +738,16 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ### Changed
 - License updated to MIT

-[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.13.1...HEAD
+[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.17.0...HEAD
+[2.17.0]: https://github.com/gsd-build/gsd-2/compare/v2.16.0...v2.17.0
+[2.16.0]: https://github.com/gsd-build/gsd-2/compare/v2.15.1...v2.16.0
+[2.15.1]: https://github.com/gsd-build/gsd-2/releases/tag/v2.15.1
+[2.15.0]: https://github.com/gsd-build/gsd-2/compare/v2.14.4...v2.15.0
+[2.14.4]: https://github.com/gsd-build/gsd-2/compare/v2.14.3...v2.14.4
+[2.14.3]: https://github.com/gsd-build/gsd-2/compare/v2.14.2...v2.14.3
+[2.14.2]: https://github.com/gsd-build/gsd-2/compare/v2.14.1...v2.14.2
+[2.14.1]: https://github.com/gsd-build/gsd-2/compare/v2.14.0...v2.14.1
+[2.14.0]: https://github.com/gsd-build/gsd-2/compare/v2.13.1...v2.14.0
 [2.13.1]: https://github.com/gsd-build/gsd-2/compare/v2.13.0...v2.13.1
 [2.13.0]: https://github.com/gsd-build/gsd-2/compare/v2.12.0...v2.13.0
 [2.12.0]: https://github.com/gsd-build/gsd-2/compare/v2.11.1...v2.12.0
--- a/README.md
+++ b/README.md
@ -21,6 +21,25 @@ One command. Walk away. Come back to a built project with clean git history.

 ---

+## Documentation
+
+Full documentation is available in the [`docs/`](./docs/) directory:
+
+- **[Getting Started](./docs/getting-started.md)** — install, first run, basic usage
+- **[Auto Mode](./docs/auto-mode.md)** — autonomous execution deep-dive
+- **[Configuration](./docs/configuration.md)** — all preferences, models, git, and hooks
+- **[Token Optimization](./docs/token-optimization.md)** — profiles, context compression, complexity routing (v2.17)
+- **[Cost Management](./docs/cost-management.md)** — budgets, tracking, projections
+- **[Git Strategy](./docs/git-strategy.md)** — worktree isolation, branching, merge behavior
+- **[Working in Teams](./docs/working-in-teams.md)** — unique IDs, shared artifacts
+- **[Skills](./docs/skills.md)** — bundled skills, discovery, custom authoring
+- **[Commands Reference](./docs/commands.md)** — all commands and keyboard shortcuts
+- **[Architecture](./docs/architecture.md)** — system design and dispatch pipeline
+- **[Troubleshooting](./docs/troubleshooting.md)** — common issues, doctor, recovery
+- **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration
+
+---
+
 ## What Changed From v1

 The original GSD was a collection of markdown prompts installed into `~/.claude/commands/`. It relied entirely on the LLM reading those prompts and doing the right thing. That worked surprisingly well — but it had hard limits:
@ -38,7 +57,7 @@ GSD v2 solves all of these because it's not a prompt framework anymore — it's
 | Context management   | Hope the LLM doesn't fill up | Fresh session per task, programmatic                    |
 | Auto mode            | LLM self-loop                | State machine reading `.gsd/` files                     |
 | Crash recovery       | None                         | Lock files + session forensics                          |
-| Git strategy         | LLM writes git commands      | Programmatic branch-per-slice, squash merge             |
+| Git strategy         | LLM writes git commands      | Worktree isolation, sequential commits, squash merge    |
 | Cost tracking        | None                         | Per-unit token/cost ledger with dashboard               |
 | Stuck detection      | None                         | Retry once, then stop with diagnostics                  |
 | Timeout supervision  | None                         | Soft/idle/hard timeouts with recovery steering          |
@ -111,7 +130,7 @@ Auto mode is a state machine driven by files on disk. It reads `.gsd/STATE.md`,

 2. **Context pre-loading** — The dispatch prompt includes inlined task plans, slice plans, prior task summaries, dependency summaries, roadmap excerpts, and decisions register. The LLM starts with everything it needs instead of spending tool calls reading files.

-3. **Git branch-per-slice** — Each slice gets its own branch (`gsd/M001/S01`). Tasks commit atomically on the branch. When the slice completes, it's squash-merged to main (or whichever branch you started from) as one clean commit.
+3. **Git worktree isolation** — Each milestone runs in its own git worktree with a `milestone/<MID>` branch. All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit.

 4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context.

@ -213,6 +232,7 @@ On first run, GSD launches a branded setup wizard that walks you through LLM pro
 | `/gsd next`             | Explicit step mode (same as bare `/gsd`)                        |
 | `/gsd auto`             | Autonomous mode — researches, plans, executes, commits, repeats |
 | `/gsd stop`             | Stop auto mode gracefully                                       |
+| `/gsd steer`            | Hard-steer plan documents during execution                      |
 | `/gsd discuss`          | Discuss architecture and decisions (works alongside auto mode)  |
 | `/gsd status`           | Progress dashboard                                              |
 | `/gsd queue`            | Queue future milestones (safe during auto mode)                 |
@ -268,7 +288,7 @@ gsd/M001/S01 (deleted after merge):
  feat(S01/T01): core types and interfaces
 ```

-One commit per slice on main (or whichever branch you started from). Squash commits are the permanent record — branches are deleted after merge. Git bisect works. Individual slices are revertable.
+One squash commit per milestone on main (or whichever branch you started from). The worktree is torn down after merge. Git bisect works. Individual milestones are revertable.

 ### Verification

@ -333,6 +353,26 @@ unique_milestone_ids: true
 | `skill_rules`          | Situational rules for skill routing                                                                   |
 | `unique_milestone_ids` | Uses unique milestone names to avoid clashes when working in teams of people                          |

+### Token Optimization (v2.17)
+
+GSD 2.17 introduced a coordinated token optimization system that reduces usage by 40-60% on cost-sensitive workloads. Set a single preference to coordinate model selection, phase skipping, and context compression:
+
+```yaml
+token_profile: budget      # or balanced (default), quality
+```
+
+| Profile | Savings | What It Does |
+|---------|---------|-------------|
+| `budget` | 40-60% | Cheap models, skip research/reassess, minimal context inlining |
+| `balanced` | 10-20% | Default models, skip slice research, standard context |
+| `quality` | 0% | All phases, all context, full model power |
+
+**Complexity-based routing** automatically classifies tasks as simple/standard/complex and routes to appropriate models. Simple docs tasks get Haiku; complex architectural work gets Opus. The classification is heuristic (sub-millisecond, no LLM calls) and learns from outcomes via a persistent routing history.
+
+**Budget pressure** graduates model downgrading as you approach your budget ceiling — 50%, 75%, and 90% thresholds progressively shift work to cheaper tiers.
+
+See the full [Token Optimization Guide](./docs/token-optimization.md) for details.
+
 ### Bundled Tools

 GSD ships with 14 extensions, all loaded automatically:
--- a/docs/README.md
+++ b/docs/README.md
@ -0,0 +1,44 @@
+# GSD Documentation
+
+Welcome to the GSD documentation. This covers everything from getting started to advanced configuration, auto-mode internals, and extending GSD with the Pi SDK.
+
+## User Documentation
+
+| Guide | Description |
+|-------|-------------|
+| [Getting Started](./getting-started.md) | Installation, first run, and basic usage |
+| [Auto Mode](./auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering |
+| [Commands Reference](./commands.md) | All commands, keyboard shortcuts, and CLI flags |
+| [Configuration](./configuration.md) | Preferences, model selection, git settings, and token profiles |
+| [Token Optimization](./token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) |
+| [Cost Management](./cost-management.md) | Budget ceilings, cost tracking, projections, and enforcement modes |
+| [Git Strategy](./git-strategy.md) | Worktree isolation, branching model, and merge behavior |
+| [Working in Teams](./working-in-teams.md) | Unique milestone IDs, `.gitignore` setup, and shared planning artifacts |
+| [Skills](./skills.md) | Bundled skills, skill discovery, and custom skill authoring |
+| [Migration from v1](./migration.md) | Migrating `.planning` directories from the original GSD |
+| [Troubleshooting](./troubleshooting.md) | Common issues, `/gsd doctor`, and recovery procedures |
+
+## Architecture & Internals
+
+| Guide | Description |
+|-------|-------------|
+| [Architecture Overview](./architecture.md) | System design, extension model, state-on-disk, and dispatch pipeline |
+| [Native Engine](../native/README.md) | Rust N-API modules for performance-critical operations |
+| [ADR-001: Branchless Worktree Architecture](./ADR-001-branchless-worktree-architecture.md) | Decision record for the v2.14 git architecture |
+
+## Pi SDK Documentation
+
+These guides cover the underlying Pi SDK that GSD is built on. Useful if you want to extend GSD or build your own agent application.
+
+| Guide | Description |
+|-------|-------------|
+| [What is Pi](./what-is-pi/README.md) | Core concepts — modes, agent loop, sessions, tools, providers |
+| [Extending Pi](./extending-pi/README.md) | Building extensions — tools, commands, UI, events, state |
+| [Context & Hooks](./context-and-hooks/README.md) | Context pipeline, hook reference, inter-extension communication |
+| [Pi UI / TUI](./pi-ui-tui/README.md) | Terminal UI components, theming, keyboard input, rendering |
+
+## Research
+
+| Guide | Description |
+|-------|-------------|
+| [Building Coding Agents](./building-coding-agents/README.md) | Research notes on agent design — decomposition, context engineering, cost/quality tradeoffs |
--- a/docs/architecture.md
+++ b/docs/architecture.md
@ -0,0 +1,108 @@
+# Architecture Overview
+
+GSD is a TypeScript application built on the [Pi SDK](https://github.com/badlogic/pi-mono). It embeds the Pi coding agent and extends it with the GSD workflow engine, auto mode state machine, and project management primitives.
+
+## System Structure
+
+```
+gsd (CLI binary)
+  └─ loader.ts          Sets PI_PACKAGE_DIR, GSD env vars, dynamic-imports cli.ts
+      └─ cli.ts         Wires SDK managers, loads extensions, starts InteractiveMode
+          ├─ onboarding.ts   First-run setup wizard (LLM provider + tool keys)
+          ├─ wizard.ts       Env hydration from stored auth.json credentials
+          ├─ app-paths.ts    ~/.gsd/agent/, ~/.gsd/sessions/, auth.json
+          ├─ resource-loader.ts  Syncs bundled extensions + agents to ~/.gsd/agent/
+          └─ src/resources/
+              ├─ extensions/gsd/    Core GSD extension
+              ├─ extensions/...     12 supporting extensions
+              ├─ agents/            scout, researcher, worker
+              ├─ AGENTS.md          Agent routing instructions
+              └─ GSD-WORKFLOW.md    Manual bootstrap protocol
+```
+
+## Key Design Decisions
+
+### State Lives on Disk
+
+`.gsd/` is the sole source of truth. Auto mode reads it, writes it, and advances based on what it finds. No in-memory state survives across sessions. This enables crash recovery, multi-terminal steering, and session resumption.
+
+### Two-File Loader Pattern
+
+`loader.ts` sets all environment variables with zero SDK imports, then dynamically imports `cli.ts` which does static SDK imports. This ensures `PI_PACKAGE_DIR` is set before any SDK code evaluates.
+
+### `pkg/` Shim Directory
+
+`PI_PACKAGE_DIR` points to `pkg/` (not project root) to avoid Pi's theme resolution colliding with GSD's `src/` directory. Contains only `piConfig` and theme assets.
+
+### Always-Overwrite Sync
+
+Bundled extensions and agents are synced to `~/.gsd/agent/` on every launch, not just first run. This means `npm update -g` takes effect immediately.
+
+### Fresh Session Per Unit
+
+Every dispatch creates a new agent session. The LLM starts with a clean context window containing only the pre-inlined artifacts it needs. This prevents quality degradation from context accumulation.
+
+## Bundled Extensions
+
+| Extension | What It Provides |
+|-----------|-----------------|
+| **GSD** | Core workflow engine — auto mode, state machine, commands, dashboard |
+| **Browser Tools** | Playwright-based browser with form intelligence and semantic actions |
+| **Search the Web** | Brave Search, Tavily, or Jina page extraction |
+| **Google Search** | Gemini-powered web search with AI-synthesized answers |
+| **Context7** | Up-to-date library/framework documentation |
+| **Background Shell** | Long-running process management with readiness detection |
+| **Subagent** | Delegated tasks with isolated context windows |
+| **Mac Tools** | macOS native app automation via Accessibility APIs |
+| **MCPorter** | Lazy on-demand MCP server integration |
+| **Voice** | Real-time speech-to-text (macOS, Linux) |
+| **Slash Commands** | Custom command creation |
+| **LSP** | Language Server Protocol — diagnostics, definitions, references, hover, rename |
+| **Ask User Questions** | Structured user input with single/multi-select |
+| **Secure Env Collect** | Masked secret collection |
+
+## Bundled Agents
+
+| Agent | Role |
+|-------|------|
+| **Scout** | Fast codebase recon — compressed context for handoff |
+| **Researcher** | Web research — finds and synthesizes current information |
+| **Worker** | General-purpose execution in an isolated context window |
+
+## Native Engine
+
+Performance-critical operations use a Rust N-API engine:
+
+- **grep** — ripgrep-backed content search
+- **glob** — gitignore-aware file discovery
+- **ps** — cross-platform process tree management
+- **highlight** — syntect-based syntax highlighting
+- **ast** — structural code search via ast-grep
+- **diff** — fuzzy text matching and unified diff generation
+- **text** — ANSI-aware text measurement and wrapping
+- **html** — HTML-to-Markdown conversion
+- **image** — decode, encode, resize images
+- **fd** — fuzzy file path discovery
+- **clipboard** — native clipboard access
+- **git** — libgit2-backed git read operations (v2.16+)
+- **parser** — GSD file parsing and frontmatter extraction
+
+## Dispatch Pipeline
+
+The auto mode dispatch pipeline:
+
+```
+1. Read disk state (STATE.md, roadmap, plans)
+2. Determine next unit type and ID
+3. Classify complexity → select model tier
+4. Apply budget pressure adjustments
+5. Check routing history for adaptive adjustments
+6. Resolve effective model (with fallbacks)
+7. Build dispatch prompt (applying inline level compression)
+8. Create fresh agent session
+9. Inject prompt and let LLM execute
+10. On completion: snapshot metrics, verify artifacts, persist state
+11. Loop to step 1
+```
+
+Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the corresponding unit type is never dispatched.
--- a/docs/auto-mode.md
+++ b/docs/auto-mode.md
@ -0,0 +1,143 @@
+# Auto Mode
+
+Auto mode is GSD's autonomous execution engine. Run `/gsd auto`, walk away, come back to built software with clean git history.
+
+## How It Works
+
+Auto mode is a **state machine driven by files on disk**. It reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit.
+
+### The Loop
+
+Each slice flows through phases automatically:
+
+```
+Research → Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice
+```
+
+- **Research** — scouts the codebase and relevant docs
+- **Plan** — decomposes the slice into tasks with must-haves
+- **Execute** — runs each task in a fresh context window
+- **Complete** — writes summary, UAT script, marks roadmap, commits
+- **Reassess** — checks if the roadmap still makes sense
+
+## Key Properties
+
+### Fresh Session Per Unit
+
+Every task, research phase, and planning step gets a clean context window. No accumulated garbage. No degraded quality from context bloat. The dispatch prompt includes everything needed — task plans, prior summaries, dependency context, decisions register — so the LLM starts oriented instead of spending tool calls reading files.
+
+### Context Pre-Loading
+
+The dispatch prompt is carefully constructed with:
+
+| Inlined Artifact | Purpose |
+|------------------|---------|
+| Task plan | What to build |
+| Slice plan | Where this task fits |
+| Prior task summaries | What's already done |
+| Dependency summaries | Cross-slice context |
+| Roadmap excerpt | Overall direction |
+| Decisions register | Architectural context |
+
+The amount of context inlined is controlled by your [token profile](./token-optimization.md). Budget mode inlines minimal context; quality mode inlines everything.
+
+### Git Worktree Isolation
+
+Each milestone runs in its own git worktree with a `milestone/<MID>` branch. All slice work commits sequentially — no branch switching, no merge conflicts mid-milestone. When the milestone completes, it's squash-merged to main as one clean commit.
+
+See [Git Strategy](./git-strategy.md) for details.
+
+### Crash Recovery
+
+A lock file tracks the current unit. If the session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context.
+
+### Stuck Detection
+
+If the same unit dispatches twice (the LLM didn't produce the expected artifact), GSD retries once with a deep diagnostic prompt. If it fails again, auto mode stops with the exact file it expected, so you can intervene.
+
+### Timeout Supervision
+
+Three timeout tiers prevent runaway sessions:
+
+| Timeout | Default | Behavior |
+|---------|---------|----------|
+| Soft | 20 min | Warns the LLM to wrap up |
+| Idle | 10 min | Detects stalls, intervenes |
+| Hard | 30 min | Pauses auto mode |
+
+Recovery steering nudges the LLM to finish durable output before timing out. Configure in preferences:
+
+```yaml
+auto_supervisor:
+  soft_timeout_minutes: 20
+  idle_timeout_minutes: 10
+  hard_timeout_minutes: 30
+```
+
+### Cost Tracking
+
+Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending.
+
+See [Cost Management](./cost-management.md).
+
+### Adaptive Replanning
+
+After each slice completes, the roadmap is reassessed. If the work revealed new information that changes the plan, slices are reordered, added, or removed before continuing. This can be skipped with the `balanced` or `budget` token profiles.
+
+## Controlling Auto Mode
+
+### Start
+
+```
+/gsd auto
+```
+
+### Pause
+
+Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume.
+
+### Resume
+
+```
+/gsd auto
+```
+
+Auto mode reads disk state and picks up where it left off.
+
+### Stop
+
+```
+/gsd stop
+```
+
+Stops auto mode gracefully. Can be run from a different terminal.
+
+### Steer
+
+```
+/gsd steer
+```
+
+Hard-steer plan documents during execution without stopping the pipeline. Changes are picked up at the next phase boundary.
+
+## Dashboard
+
+`Ctrl+Alt+G` or `/gsd status` shows real-time progress:
+
+- Current milestone, slice, and task
+- Auto mode elapsed time and phase
+- Per-unit cost and token breakdown
+- Cost projections
+- Completed and in-progress units
+
+## Phase Skipping
+
+Token profiles can skip certain phases to reduce cost:
+
+| Phase | `budget` | `balanced` | `quality` |
+|-------|----------|------------|-----------|
+| Milestone Research | Skipped | Runs | Runs |
+| Slice Research | Skipped | Skipped | Runs |
+| Reassess Roadmap | Skipped | Runs | Runs |
+
+See [Token Optimization](./token-optimization.md) for details.
--- a/docs/commands.md
+++ b/docs/commands.md
@ -0,0 +1,54 @@
+# Commands Reference
+
+## Session Commands
+
+| Command | Description |
+|---------|-------------|
+| `/gsd` | Step mode — execute one unit at a time, pause between each |
+| `/gsd next` | Explicit step mode (same as `/gsd`) |
+| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat |
+| `/gsd stop` | Stop auto mode gracefully |
+| `/gsd steer` | Hard-steer plan documents during execution |
+| `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) |
+| `/gsd status` | Progress dashboard |
+| `/gsd queue` | Queue future milestones (safe during auto mode) |
+| `/gsd prefs` | Model selection, timeouts, budget ceiling |
+| `/gsd migrate` | Migrate a v1 `.planning` directory to `.gsd` format |
+| `/gsd doctor` | Validate `.gsd/` integrity, find and fix issues |
+
+## Git Commands
+
+| Command | Description |
+|---------|-------------|
+| `/worktree` (`/wt`) | Git worktree lifecycle — create, switch, merge, remove |
+
+## Session Management
+
+| Command | Description |
+|---------|-------------|
+| `/clear` | Start a new session (alias for `/new`) |
+| `/exit` | Graceful shutdown — saves session state before exiting |
+| `/kill` | Kill GSD process immediately |
+| `/model` | Switch the active model |
+| `/login` | Log in to an LLM provider |
+| `/thinking` | Toggle thinking level during sessions |
+| `/voice` | Toggle real-time speech-to-text (macOS, Linux) |
+
+## Keyboard Shortcuts
+
+| Shortcut | Action |
+|----------|--------|
+| `Ctrl+Alt+G` | Toggle dashboard overlay |
+| `Ctrl+Alt+V` | Toggle voice transcription |
+| `Ctrl+Alt+B` | Show background shell processes |
+| `Escape` | Pause auto mode (preserves conversation) |
+
+> **Note:** In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts.
+
+## CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `gsd` | Start a new interactive session |
+| `gsd --continue` (`-c`) | Resume the most recent session for the current directory |
+| `gsd config` | Re-run the setup wizard (LLM provider + tool keys) |
--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -0,0 +1,238 @@
+# Configuration
+
+GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md` (project-local). Manage interactively with `/gsd prefs`.
+
+## Preferences File Format
+
+Preferences use YAML frontmatter in a markdown file:
+
+```yaml
+---
+version: 1
+models:
+  research: claude-sonnet-4-6
+  planning: claude-opus-4-6
+  execution: claude-sonnet-4-6
+  completion: claude-sonnet-4-6
+skill_discovery: suggest
+auto_supervisor:
+  soft_timeout_minutes: 20
+  idle_timeout_minutes: 10
+  hard_timeout_minutes: 30
+budget_ceiling: 50.00
+token_profile: balanced
+---
+```
+
+## Global vs Project Preferences
+
+| Scope | Path | Applies to |
+|-------|------|-----------|
+| Global | `~/.gsd/preferences.md` | All projects |
+| Project | `.gsd/preferences.md` | Current project only |
+
+**Merge behavior:**
+- **Scalar fields** (`skill_discovery`, `budget_ceiling`): project wins if defined
+- **Array fields** (`always_use_skills`, etc.): concatenated (global first, then project)
+- **Object fields** (`models`, `git`, `auto_supervisor`): shallow-merged, project overrides per-key
+
+## All Settings
+
+### `models`
+
+Per-phase model selection. Each key accepts a model string or an object with fallbacks.
+
+```yaml
+models:
+  research: claude-sonnet-4-6
+  planning:
+    model: claude-opus-4-6
+    fallbacks:
+      - openrouter/z-ai/glm-5
+  execution: claude-sonnet-4-6
+  execution_simple: claude-haiku-4-5-20250414
+  completion: claude-sonnet-4-6
+  subagent: claude-sonnet-4-6
+```
+
+**Phases:** `research`, `planning`, `execution`, `execution_simple`, `completion`, `subagent`
+
+- `execution_simple` — used for tasks classified as "simple" by the [complexity router](./token-optimization.md#complexity-based-task-routing)
+- `subagent` — model for delegated subagent tasks (scout, researcher, worker)
+- Provider targeting: use `provider/model` format (e.g., `bedrock/claude-sonnet-4-6`) or the `provider` field in object format
+
+### `token_profile`
+
+Coordinates model selection, phase skipping, and context compression. See [Token Optimization](./token-optimization.md).
+
+Values: `budget`, `balanced` (default), `quality`
+
+### `phases`
+
+Fine-grained control over which phases run in auto mode:
+
+```yaml
+phases:
+  skip_research: false        # skip milestone-level research
+  skip_reassess: false        # skip roadmap reassessment after each slice
+  skip_slice_research: true   # skip per-slice research
+```
+
+These are usually set automatically by `token_profile`, but can be overridden explicitly.
+
+### `skill_discovery`
+
+Controls how GSD finds and applies skills during auto mode.
+
+| Value | Behavior |
+|-------|----------|
+| `auto` | Skills found and applied automatically |
+| `suggest` | Skills identified during research but not auto-installed (default) |
+| `off` | Skill discovery disabled |
+
+### `auto_supervisor`
+
+Timeout thresholds for auto mode supervision:
+
+```yaml
+auto_supervisor:
+  soft_timeout_minutes: 20    # warn LLM to wrap up
+  idle_timeout_minutes: 10    # detect stalls
+  hard_timeout_minutes: 30    # pause auto mode
+```
+
+### `budget_ceiling`
+
+USD ceiling. Auto mode pauses when reached.
+
+```yaml
+budget_ceiling: 50.00
+```
+
+### `budget_enforcement`
+
+How the budget ceiling is enforced:
+
+| Value | Behavior |
+|-------|----------|
+| `warn` | Log a warning but continue |
+| `pause` | Pause auto mode (default when ceiling is set) |
+| `halt` | Stop auto mode entirely |
+
+### `uat_dispatch`
+
+Enable automatic UAT (User Acceptance Test) runs after slice completion:
+
+```yaml
+uat_dispatch: true
+```
+
+### `unique_milestone_ids`
+
+Generate milestone IDs with a random suffix to avoid collisions in team workflows:
+
+```yaml
+unique_milestone_ids: true
+# Produces: M001-eh88as instead of M001
+```
+
+### `git`
+
+Git behavior configuration. All fields optional:
+
+```yaml
+git:
+  auto_push: false            # push commits to remote after committing
+  push_branches: false        # push milestone branch to remote
+  remote: origin              # git remote name
+  snapshots: false            # WIP snapshot commits during long tasks
+  pre_merge_check: false      # run checks before worktree merge (true/false/"auto")
+  commit_type: feat           # override conventional commit prefix
+  main_branch: main           # primary branch name
+  commit_docs: true           # commit .gsd/ artifacts to git (set false to keep local)
+```
+
+### `notifications`
+
+Control what notifications GSD sends (for remote question integrations):
+
+```yaml
+notifications:
+  enabled: true
+  on_complete: true           # notify on unit completion
+  on_error: true              # notify on errors
+  on_budget: true             # notify on budget thresholds
+  on_milestone: true          # notify when milestone finishes
+  on_attention: true          # notify when manual attention needed
+```
+
+### `remote_questions`
+
+Route interactive questions to Slack or Discord for headless auto-mode:
+
+```yaml
+remote_questions:
+  channel: slack              # or discord
+  channel_id: "C1234567890"
+  timeout_minutes: 15
+  poll_interval_seconds: 10
+```
+
+### `post_unit_hooks`
+
+Custom hooks that fire after specific unit types complete:
+
+```yaml
+post_unit_hooks:
+  - name: code-review
+    after: [execute-task]
+    prompt: "Review the code changes for quality and security issues."
+    model: claude-opus-4-6
+    max_cycles: 1
+```
+
+### `pre_dispatch_hooks`
+
+Hooks that intercept units before dispatch:
+
+```yaml
+pre_dispatch_hooks:
+  - name: add-context
+    before: [execute-task]
+    action: modify
+    prepend: "Remember to follow our coding standards document."
+```
+
+### `always_use_skills` / `prefer_skills` / `avoid_skills`
+
+Skill routing preferences:
+
+```yaml
+always_use_skills:
+  - debug-like-expert
+prefer_skills:
+  - frontend-design
+avoid_skills: []
+```
+
+### `skill_rules`
+
+Situational skill routing:
+
+```yaml
+skill_rules:
+  - when: task involves authentication
+    use: [clerk]
+  - when: frontend styling work
+    prefer: [frontend-design]
+```
+
+### `custom_instructions`
+
+Durable instructions appended to every session:
+
+```yaml
+custom_instructions:
+  - "Always use TypeScript strict mode"
+  - "Prefer functional patterns over classes"
+```
--- a/docs/cost-management.md
+++ b/docs/cost-management.md
@ -0,0 +1,91 @@
+# Cost Management
+
+GSD tracks token usage and cost for every unit of work dispatched during auto mode. This data powers the dashboard, budget enforcement, and cost projections.
+
+## Cost Tracking
+
+Every unit's metrics are captured automatically:
+
+- **Token counts** — input, output, cache read, cache write, total
+- **Cost** — USD cost per unit
+- **Duration** — wall-clock time
+- **Tool calls** — number of tool invocations
+- **Message counts** — assistant and user messages
+
+Data is stored in `.gsd/metrics.json` and survives across sessions.
+
+### Viewing Costs
+
+**Dashboard:** `Ctrl+Alt+G` or `/gsd status` shows real-time cost breakdown.
+
+**Aggregations available:**
+- By phase (research, planning, execution, completion, reassessment)
+- By slice (M001/S01, M001/S02, ...)
+- By model (which models consumed the most budget)
+- Project totals
+
+## Budget Ceiling
+
+Set a maximum spend for a project:
+
+```yaml
+---
+version: 1
+budget_ceiling: 50.00
+---
+```
+
+### Enforcement Modes
+
+Control what happens when the ceiling is reached:
+
+```yaml
+budget_enforcement: pause    # default when ceiling is set
+```
+
+| Mode | Behavior |
+|------|----------|
+| `warn` | Log a warning, continue executing |
+| `pause` | Pause auto mode, wait for user action |
+| `halt` | Stop auto mode entirely |
+
+## Cost Projections
+
+Once at least two slices have completed, GSD projects the remaining cost:
+
+```
+Projected remaining: $12.40 ($6.20/slice avg × 2 remaining)
+```
+
+Projections use per-slice averages from completed work. If the budget ceiling has been reached, a warning is appended.
+
+## Budget Pressure & Model Downgrading
+
+When approaching the budget ceiling, the [complexity router](./token-optimization.md#budget-pressure) automatically downgrades model assignments to cheaper tiers. This is graduated:
+
+- **< 50% used** — no adjustment
+- **50-75% used** — standard tasks downgrade to light
+- **75-90% used** — same, more aggressive
+- **> 90% used** — nearly everything downgrades; only heavy tasks stay at standard
+
+This ensures the budget is spread across remaining work instead of being exhausted early on complex tasks.
+
+## Token Profiles & Cost
+
+The `token_profile` preference directly affects cost:
+
+| Profile | Typical Savings | How |
+|---------|----------------|-----|
+| `budget` | 40-60% | Cheaper models, phase skipping, minimal context |
+| `balanced` | 10-20% | Default models, skip slice research, standard context |
+| `quality` | 0% (baseline) | Full models, all phases, full context |
+
+See [Token Optimization](./token-optimization.md) for details.
+
+## Tips
+
+- Start with `balanced` profile and a generous `budget_ceiling` to establish baseline costs
+- Check `/gsd status` after a few slices to see per-slice cost averages
+- Switch to `budget` profile for well-understood, repetitive work
+- Use `quality` only when architectural decisions are being made
+- Per-phase model selection lets you use Opus only for planning while keeping execution on Sonnet
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@ -0,0 +1,133 @@
+# Getting Started
+
+## Install
+
+```bash
+npm install -g gsd-pi
+```
+
+Requires Node.js ≥ 20.6.0 (22+ recommended) and Git.
+
+## First Launch
+
+Run `gsd` in any directory:
+
+```bash
+gsd
+```
+
+On first launch, GSD runs a setup wizard:
+
+1. **LLM Provider** — select from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key.
+2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any.
+
+If you have an existing Pi installation, provider credentials are imported automatically.
+
+Re-run the wizard anytime with:
+
+```bash
+gsd config
+```
+
+## Choose a Model
+
+GSD auto-selects a default model after login. Switch later with:
+
+```
+/model
+```
+
+Or configure per-phase models in preferences — see [Configuration](./configuration.md).
+
+## Two Ways to Work
+
+### Step Mode — `/gsd`
+
+Type `/gsd` inside a session. GSD executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next.
+
+- **No `.gsd/` directory** → starts a discussion flow to capture your project vision
+- **Milestone exists, no roadmap** → discuss or research the milestone
+- **Roadmap exists, slices pending** → plan the next slice or execute a task
+- **Mid-task** → resume where you left off
+
+Step mode is the on-ramp. You stay in the loop, reviewing output between each step.
+
+### Auto Mode — `/gsd auto`
+
+Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete.
+
+```
+/gsd auto
+```
+
+See [Auto Mode](./auto-mode.md) for full details.
+
+## Two Terminals, One Project
+
+The recommended workflow: auto mode in one terminal, steering from another.
+
+**Terminal 1 — let it build:**
+
+```bash
+gsd
+/gsd auto
+```
+
+**Terminal 2 — steer while it works:**
+
+```bash
+gsd
+/gsd discuss    # talk through architecture decisions
+/gsd status     # check progress
+/gsd queue      # queue the next milestone
+```
+
+Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically.
+
+## Project Structure
+
+GSD organizes work into a hierarchy:
+
+```
+Milestone  →  a shippable version (4-10 slices)
+  Slice    →  one demoable vertical capability (1-7 tasks)
+    Task   →  one context-window-sized unit of work
+```
+
+The iron rule: **a task must fit in one context window.** If it can't, it's two tasks.
+
+All state lives on disk in `.gsd/`:
+
+```
+.gsd/
+  PROJECT.md          — what the project is right now
+  REQUIREMENTS.md     — requirement contract (active/validated/deferred)
+  DECISIONS.md        — append-only architectural decisions
+  STATE.md            — quick-glance status
+  milestones/
+    M001/
+      M001-ROADMAP.md — slice plan with risk levels and dependencies
+      M001-CONTEXT.md — scope and goals from discussion
+      slices/
+        S01/
+          S01-PLAN.md     — task decomposition
+          S01-SUMMARY.md  — what happened
+          S01-UAT.md      — human test script
+          tasks/
+            T01-PLAN.md
+            T01-SUMMARY.md
+```
+
+## Resume a Session
+
+```bash
+gsd --continue    # or gsd -c
+```
+
+Resumes the most recent session for the current directory.
+
+## Next Steps
+
+- [Auto Mode](./auto-mode.md) — deep dive into autonomous execution
+- [Configuration](./configuration.md) — model selection, timeouts, budgets
+- [Commands Reference](./commands.md) — all commands and shortcuts
--- a/docs/git-strategy.md
+++ b/docs/git-strategy.md
@ -0,0 +1,92 @@
+# Git Strategy
+
+GSD uses git worktrees for milestone isolation and sequential commits within each milestone. The strategy is fully automated — you don't need to manage branches manually.
+
+## Branching Model
+
+```
+main ─────────────────────────────────────────────────────────
+  │                                                     ↑
+  └── milestone/M001 (worktree) ────────────────────────┘
+       commit: feat(S01/T01): core types
+       commit: feat(S01/T02): markdown parser
+       commit: feat(S01/T03): file writer
+       commit: docs(M001/S01): workflow docs
+       ...
+       → squash-merged to main as single commit
+```
+
+### Key Properties
+
+- **One worktree per milestone** — all work happens in `.gsd/worktrees/<MID>/`
+- **Sequential commits on one branch** — no per-slice branches, no merge conflicts within a milestone
+- **Squash merge to main** — when the milestone completes, all commits are squashed into one clean commit on main
+- **Worktree teardown** — after merge, the worktree and branch are cleaned up
+
+### Commit Format
+
+Commits use conventional commit format with scope:
+
+```
+feat(S01/T01): core type definitions
+feat(S01/T02): markdown parser for plan files
+fix(M001/S03): bug fixes and doc corrections
+docs(M001/S04): workflow documentation
+```
+
+## Worktree Management
+
+### Automatic (Auto Mode)
+
+Auto mode creates and manages worktrees automatically:
+
+1. When a milestone starts, a worktree is created at `.gsd/worktrees/<MID>/` on branch `milestone/<MID>`
+2. Planning artifacts from `.gsd/milestones/` are copied into the worktree
+3. All execution happens inside the worktree
+4. On milestone completion, the worktree is squash-merged to the integration branch
+5. The worktree and branch are removed
+
+### Manual
+
+Use the `/worktree` (or `/wt`) command for manual worktree management:
+
+```
+/worktree create
+/worktree switch
+/worktree merge
+/worktree remove
+```
+
+## Git Preferences
+
+Configure git behavior in preferences:
+
+```yaml
+git:
+  auto_push: false            # push after commits
+  push_branches: false        # push milestone branch
+  remote: origin
+  snapshots: false            # WIP snapshot commits
+  pre_merge_check: false      # pre-merge validation
+  commit_type: feat           # override commit type prefix
+  main_branch: main           # primary branch name
+  commit_docs: true           # commit .gsd/ to git
+```
+
+### `commit_docs: false`
+
+When set to `false`, GSD adds `.gsd/` to `.gitignore` and keeps all planning artifacts local-only. Useful for teams where only some members use GSD, or when company policy requires a clean repository.
+
+## Self-Healing
+
+GSD includes automatic recovery for common git issues:
+
+- **Detached HEAD** — automatically reattaches to the correct branch
+- **Stale lock files** — removes `index.lock` files from crashed processes
+- **Orphaned worktrees** — detects and offers to clean up abandoned worktrees
+
+Run `/gsd doctor` to check git health manually.
+
+## Native Git Operations
+
+Since v2.16, GSD uses libgit2 via native bindings for read-heavy operations in the dispatch hot path. This eliminates ~70 process spawns per dispatch cycle, improving auto-mode throughput.
--- a/docs/migration.md
+++ b/docs/migration.md
@ -0,0 +1,48 @@
+# Migration from v1
+
+If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format.
+
+## Running the Migration
+
+```bash
+# From within the project directory
+/gsd migrate
+
+# Or specify a path
+/gsd migrate ~/projects/my-old-project
+```
+
+## What Gets Migrated
+
+The migration tool:
+
+- Parses your old `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research
+- Maps phases → slices, plans → tasks, milestones → milestones
+- Preserves completion state (`[x]` phases stay done, summaries carry over)
+- Consolidates research files into the new structure
+- Shows a preview before writing anything
+- Optionally runs an agent-driven review of the output for quality assurance
+
+## Supported Formats
+
+The migration handles various v1 format variations:
+
+- Milestone-sectioned roadmaps with `<details>` blocks
+- Bold phase entries
+- Bullet-format requirements
+- Decimal phase numbering
+- Duplicate phase numbers across milestones
+
+## Requirements
+
+Migration works best with a `ROADMAP.md` file for milestone structure. Without one, milestones are inferred from the `phases/` directory.
+
+## Post-Migration
+
+After migrating, verify the output with:
+
+```
+/gsd doctor
+```
+
+This checks `.gsd/` integrity and flags any structural issues.
--- a/docs/skills.md
+++ b/docs/skills.md
@ -0,0 +1,84 @@
+# Skills
+
+Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance for the LLM — coding patterns, framework idioms, testing strategies, and tool usage.
+
+## Bundled Skills
+
+GSD ships with these skills, installed to `~/.gsd/agent/skills/`:
+
+| Skill | Trigger | Description |
+|-------|---------|-------------|
+| `frontend-design` | Web UI work — components, pages, dashboards, styling | Production-grade frontend with high design quality |
+| `swiftui` | macOS/iOS apps — SwiftUI, Xcode, App Store | Full lifecycle from creation to shipping |
+| `debug-like-expert` | Complex debugging — after standard approaches fail | Methodical investigation with evidence gathering |
+| `rust-core` | Rust code — ownership, lifetimes, traits, async | Idiomatic, safe, performant Rust patterns |
+| `axum-web-framework` | Axum web apps — routing, middleware, extractors | Complete Axum development guide |
+| `axum-tests` | Testing Axum apps — integration tests, mock state | Test patterns for Axum applications |
+| `tauri` | Tauri v2 desktop apps — setup, plugins, bundling | Cross-platform desktop app development |
+| `tauri-ipc-developer` | Tauri IPC — React-Rust type-safe communication | Command scaffolding and serialization |
+| `tauri-devtools` | Tauri debugging — CrabNebula DevTools integration | Profiling and monitoring |
+| `github-workflows` | GitHub Actions — CI/CD, workflow debugging | Live syntax, run monitoring, failure diagnosis |
+| `security-audit` | Security auditing — dependency scanning, OWASP | Comprehensive security assessment |
+| `security-review` | Code security review — injection, XSS, auth flaws | Vulnerability-focused code review |
+| `security-docker` | Docker security — Dockerfile, runtime hardening | Container security best practices |
+
+## Skill Discovery
+
+The `skill_discovery` preference controls how GSD finds skills during auto mode:
+
+| Mode | Behavior |
+|------|----------|
+| `auto` | Skills are found and applied automatically |
+| `suggest` | Skills are identified but require confirmation (default) |
+| `off` | No skill discovery |
+
+## Skill Preferences
+
+Control which skills are used via preferences:
+
+```yaml
+---
+version: 1
+always_use_skills:
+  - debug-like-expert
+prefer_skills:
+  - frontend-design
+avoid_skills:
+  - security-docker
+skill_rules:
+  - when: task involves Clerk authentication
+    use: [clerk]
+  - when: frontend styling work
+    prefer: [frontend-design]
+---
+```
+
+### Resolution Order
+
+Skills can be referenced by:
+1. **Bare name** — e.g., `frontend-design` → scans `~/.gsd/agent/skills/` and project skills
+2. **Absolute path** — e.g., `/Users/you/.gsd/agent/skills/my-skill/SKILL.md`
+3. **Directory path** — e.g., `~/custom-skills/my-skill` → looks for `SKILL.md` inside
+
+User skills (`~/.gsd/agent/skills/`) take precedence over project skills.
+
+## Custom Skills
+
+Create your own skills by adding a directory with a `SKILL.md` file:
+
+```
+~/.gsd/agent/skills/my-skill/
+  SKILL.md           — instructions for the LLM
+  references/        — optional reference files
+```
+
+The `SKILL.md` file contains instructions the LLM follows when the skill is active. Reference files can be loaded by the skill instructions as needed.
+
+### Project-Local Skills
+
+Place skills in your project for project-specific guidance:
+
+```
+.pi/agent/skills/my-project-skill/
+  SKILL.md
+```
--- a/docs/token-optimization.md
+++ b/docs/token-optimization.md
@ -0,0 +1,266 @@
+# Token Optimization
+
+*Introduced in v2.17.0*
+
+GSD 2.17 introduces a coordinated token optimization system that can reduce token usage by 40-60% without sacrificing output quality for most workloads. The system has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**.
+
+## Token Profiles
+
+A token profile is a single preference that coordinates model selection, phase skipping, and context compression level. Set it in your preferences:
+
+```yaml
+---
+version: 1
+token_profile: balanced
+---
+```
+
+Three profiles are available:
+
+### `budget` — Maximum Savings (40-60% reduction)
+
+Optimized for cost-sensitive workflows. Uses cheaper models, skips optional phases, and compresses dispatch context to the minimum needed.
+
+| Dimension | Setting |
+|-----------|---------|
+| Planning model | Sonnet |
+| Execution model | Sonnet |
+| Simple task model | Haiku |
+| Completion model | Haiku |
+| Subagent model | Haiku |
+| Milestone research | **Skipped** |
+| Slice research | **Skipped** |
+| Roadmap reassessment | **Skipped** |
+| Context inline level | **Minimal** — drops decisions, requirements, extra templates |
+
+Best for: prototyping, small projects, well-understood codebases, cost-conscious iteration.
+
+### `balanced` — Smart Defaults (default)
+
+The default profile. Keeps the important phases, skips the ones with diminishing returns for most projects, and uses standard context compression.
+
+| Dimension | Setting |
+|-----------|---------|
+| Planning model | User's default |
+| Execution model | User's default |
+| Simple task model | User's default |
+| Completion model | User's default |
+| Subagent model | Sonnet |
+| Milestone research | Runs |
+| Slice research | **Skipped** |
+| Roadmap reassessment | Runs |
+| Context inline level | **Standard** — includes key context, drops low-signal extras |
+
+Best for: most projects, day-to-day development.
+
+### `quality` — Full Context (no compression)
+
+Every phase runs. Every context artifact is inlined. No shortcuts.
+
+| Dimension | Setting |
+|-----------|---------|
+| All models | User's configured defaults |
+| All phases | Run |
+| Context inline level | **Full** — everything inlined |
+
+Best for: complex architectures, greenfield projects requiring deep research, critical production work.
+
+## Context Compression
+
+Each token profile maps to an **inline level** that controls how much context is pre-loaded into dispatch prompts:
+
+| Profile | Inline Level | What's Included |
+|---------|-------------|-----------------|
+| `budget` | `minimal` | Task plan, essential prior summaries (truncated). Drops decisions register, requirements, UAT template, secrets manifest. |
+| `balanced` | `standard` | Task plan, prior summaries, slice plan, roadmap excerpt. Drops some supplementary templates. |
+| `quality` | `full` | Everything — all plans, summaries, decisions, requirements, templates, and root files. |
+
+### How Compression Works
+
+Dispatch prompt builders accept an `inlineLevel` parameter. At each level, specific artifacts are gated:
+
+**Minimal level reductions:**
+- `buildExecuteTaskPrompt` — drops the decisions template, truncates prior summaries to the most recent one
+- `buildPlanMilestonePrompt` — drops `PROJECT.md`, `REQUIREMENTS.md`, decisions, and supplementary templates like `secrets-manifest`
+- `buildCompleteSlicePrompt` — drops requirements and UAT template inlining
+- `buildCompleteMilestonePrompt` — drops root GSD file inlining
+- `buildReassessRoadmapPrompt` — drops project, requirements, and decisions files
+
+These are cumulative — `standard` drops a subset, `minimal` drops more. The `full` level preserves all context (the pre-2.17 behavior).
+
+### Overriding Inline Level
+
+The inline level is derived from your `token_profile`. To control phases independently of the profile, use the `phases` preference:
+
+```yaml
+---
+version: 1
+token_profile: budget
+phases:
+  skip_research: false    # override: run research even on budget
+---
+```
+
+Explicit `phases` settings always override the profile defaults.
+
+## Complexity-Based Task Routing
+
+GSD automatically classifies each task by complexity and routes it to an appropriate model tier. This means simple documentation fixes don't burn expensive Opus tokens, while complex architectural work gets the reasoning power it needs.
+
+### How Classification Works
+
+Tasks are classified by analyzing the task plan:
+
+| Signal | Simple | Standard | Complex |
+|--------|--------|----------|---------|
+| Step count | ≤ 3 | 4-7 | ≥ 8 |
+| File count | ≤ 3 | 4-7 | ≥ 8 |
+| Description length | < 500 chars | 500-2000 | > 2000 chars |
+| Code blocks | — | — | ≥ 5 |
+| Signal words | None | Any present | — |
+
+**Signal words** that prevent simple classification: `research`, `investigate`, `refactor`, `migrate`, `integrate`, `complex`, `architect`, `redesign`, `security`, `performance`, `concurrent`, `parallel`, `distributed`, `backward compat`, `migration`, `architecture`, `concurrency`, `compatibility`.
+
+Empty or malformed plans default to `standard` (conservative).
+
+### Unit Type Defaults
+
+Non-task units have built-in tier assignments:
+
+| Unit Type | Default Tier |
+|-----------|-------------|
+| `complete-slice`, `run-uat` | Light |
+| `research-*`, `plan-*`, `execute-task`, `complete-milestone` | Standard |
+| `replan-slice`, `reassess-roadmap` | Heavy |
+| `hook/*` | Light |
+
+### Model Routing
+
+Each tier maps to a model configuration:
+
+| Tier | Model Phase Key | Typical Model |
+|------|----------------|---------------|
+| Light | `completion` | Haiku (budget) / user default |
+| Standard | `execution` | Sonnet / user default |
+| Heavy | `execution` | Opus / user default |
+
+Simple tasks use the `execution_simple` model key when configured. This is set automatically by the `budget` profile to Haiku.
+
+### Budget Pressure
+
+When approaching your budget ceiling, the classifier automatically downgrades tiers:
+
+| Budget Used | Effect |
+|------------|--------|
+| < 50% | No adjustment |
+| 50-75% | Standard → Light |
+| 75-90% | Standard → Light |
+| > 90% | Everything except Heavy → Light; Heavy → Standard |
+
+This graduated approach preserves model quality for the most complex work while progressively reducing cost as the ceiling approaches.
+
+## Adaptive Learning (Routing History)
+
+GSD tracks the success and failure of each tier assignment over time and adjusts future classifications accordingly. This is opt-in — it happens automatically and persists in `.gsd/routing-history.json`.
+
+### How It Works
+
+1. After each unit completes, the outcome (success/failure) is recorded against the unit type and tier used
+2. Outcomes are tracked per-pattern (e.g., `execute-task`, `execute-task:docs`) with a rolling window of the last 50 entries
+3. If a tier's failure rate exceeds 20% for a given pattern, future classifications for that pattern are bumped up one tier
+4. The system also accepts tag-specific patterns (e.g., `execute-task:test` vs `execute-task:frontend`) for more granular routing
+
+### User Feedback
+
+GSD accepts manual feedback to accelerate learning:
+
+- **"over"** — the model was overpowered for this task (encourages downgrading)
+- **"under"** — the model wasn't capable enough (encourages upgrading)
+- **"ok"** — correct assignment (no adjustment)
+
+Feedback signals are weighted 2× compared to automatic outcomes.
+
+### Data Management
+
+```bash
+# Routing history is stored per-project
+.gsd/routing-history.json
+
+# Clear history to reset adaptive learning
+# (happens via the routing-history module API)
+```
+
+The feedback array is capped at 200 entries. Per-pattern outcome counts use a rolling window of 50 to prevent stale data from dominating.
+
+## Configuration Examples
+
+### Cost-Optimized Setup
+
+```yaml
+---
+version: 1
+token_profile: budget
+budget_ceiling: 25.00
+models:
+  execution_simple: claude-haiku-4-5-20250414
+---
+```
+
+### Balanced with Custom Models
+
+```yaml
+---
+version: 1
+token_profile: balanced
+models:
+  planning:
+    model: claude-opus-4-6
+    fallbacks:
+      - openrouter/z-ai/glm-5
+  execution: claude-sonnet-4-6
+---
+```
+
+### Full Quality for Critical Work
+
+```yaml
+---
+version: 1
+token_profile: quality
+models:
+  planning: claude-opus-4-6
+  execution: claude-opus-4-6
+---
+```
+
+### Per-Phase Overrides
+
+The `token_profile` sets defaults, but explicit preferences always win:
+
+```yaml
+---
+version: 1
+token_profile: budget
+phases:
+  skip_research: false     # override: keep milestone research
+models:
+  planning: claude-opus-4-6  # override: use Opus for planning despite budget profile
+---
+```
+
+## How the Pieces Fit Together
+
+```
+preferences.md
+  └─ token_profile: balanced
+       ├─ resolveProfileDefaults() → model defaults + phase skip defaults
+       ├─ resolveInlineLevel() → standard
+       │    └─ prompt builders gate context inclusion by level
+       └─ classifyUnitComplexity() → routes to execution/execution_simple model
+            ├─ task plan analysis (steps, files, signals)
+            ├─ unit type defaults
+            ├─ budget pressure adjustment
+            └─ adaptive learning from routing-history.json
+```
+
+The profile is resolved once and flows through the entire dispatch pipeline. Explicit preferences override profile defaults at every layer.
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@ -0,0 +1,114 @@
+# Troubleshooting
+
+## `/gsd doctor`
+
+The built-in diagnostic tool validates `.gsd/` integrity:
+
+```
+/gsd doctor
+```
+
+It checks:
+- File structure and naming conventions
+- Roadmap ↔ slice ↔ task referential integrity
+- Completion state consistency
+- Git worktree health
+- Stale lock files and orphaned runtime records
+
+## Common Issues
+
+### Auto mode loops on the same unit
+
+**Symptoms:** The same unit (e.g., `research-slice` or `plan-slice`) dispatches repeatedly until hitting the dispatch limit.
+
+**Causes:**
+- Stale cache after a crash — the in-memory file listing doesn't reflect new artifacts
+- The LLM didn't produce the expected artifact file
+
+**Fix:** Run `/gsd doctor` to repair state, then resume with `/gsd auto`. If the issue persists, check that the expected artifact file exists on disk.
+
+### Auto mode stops with "Loop detected"
+
+**Cause:** A unit failed to produce its expected artifact twice in a row.
+
+**Fix:** Check the task plan for clarity. If the plan is ambiguous, refine it manually, then `/gsd auto` to resume.
+
+### Wrong files in worktree
+
+**Symptoms:** Planning artifacts or code appear in the wrong directory.
+
+**Cause:** The LLM wrote to the main repo instead of the worktree.
+
+**Fix:** This was fixed in v2.14+. If you're on an older version, update. The dispatch prompt now includes explicit working directory instructions.
+
+### `npm install -g gsd-pi` fails
+
+**Common causes:**
+- Missing workspace packages — fixed in v2.10.4+
+- `postinstall` hangs on Linux (Playwright `--with-deps` triggering sudo) — fixed in v2.3.6+
+- Node.js version too old — requires ≥ 20.6.0
+
+### Provider errors during auto mode
+
+**Symptoms:** Auto mode pauses with a provider error (rate limit, auth failure, etc.).
+
+**Fix:** GSD automatically tries fallback models if configured. To add fallbacks:
+
+```yaml
+models:
+  execution:
+    model: claude-sonnet-4-6
+    fallbacks:
+      - openrouter/minimax/minimax-m2.5
+```
+
+### Budget ceiling reached
+
+**Symptoms:** Auto mode pauses with "Budget ceiling reached."
+
+**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile to reduce per-unit cost, then resume with `/gsd auto`.
+
+### Stale lock file
+
+**Symptoms:** Auto mode won't start, says another session is running.
+
+**Fix:** If no other session is actually running, delete `.gsd/auto.lock` manually. GSD includes stale lock detection (checks if the PID is still alive), but edge cases exist.
+
+### Git merge conflicts
+
+**Symptoms:** Worktree merge fails on `.gsd/` files.
+
+**Fix:** GSD auto-resolves conflicts on `.gsd/` runtime files. For content conflicts in code files, the LLM is given an opportunity to resolve them via a fix-merge session. If that fails, manual resolution is needed.
+
+## Recovery Procedures
+
+### Reset auto mode state
+
+```bash
+rm .gsd/auto.lock
+rm .gsd/completed-units.json
+```
+
+Then `/gsd auto` to restart from current disk state.
+
+### Reset routing history
+
+If adaptive model routing is producing bad results, clear the routing history:
+
+```bash
+rm .gsd/routing-history.json
+```
+
+### Full state rebuild
+
+```
+/gsd doctor
+```
+
+Doctor rebuilds `STATE.md` from plan and roadmap files on disk and fixes detected inconsistencies.
+
+## Getting Help
+
+- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/GSD-2/issues)
+- **Dashboard:** `Ctrl+Alt+G` or `/gsd status` for real-time diagnostics
+- **Session logs:** `.gsd/activity/` contains JSONL session dumps for crash forensics
--- a/docs/working-in-teams.md
+++ b/docs/working-in-teams.md
@ -0,0 +1,99 @@
+# Working in Teams
+
+GSD supports multi-user workflows where several developers work on the same repository concurrently.
+
+## Setup
+
+### 1. Enable Unique Milestone IDs
+
+Prevent ID collisions when multiple developers create milestones:
+
+```yaml
+# .gsd/preferences.md (project-level, committed to git)
+---
+version: 1
+unique_milestone_ids: true
+---
+```
+
+This generates milestone IDs like `M001-eh88as` instead of plain `M001`. The random suffix ensures no two developers clash.
+
+### 2. Configure `.gitignore`
+
+Share planning artifacts (milestones, roadmaps, decisions) while keeping runtime files local:
+
+```bash
+# ── GSD: Runtime / Ephemeral (per-developer, per-session) ──────
+.gsd/auto.lock
+.gsd/completed-units.json
+.gsd/STATE.md
+.gsd/metrics.json
+.gsd/activity/
+.gsd/runtime/
+.gsd/worktrees/
+.gsd/milestones/**/continue.md
+.gsd/milestones/**/*-CONTINUE.md
+```
+
+**What gets shared** (committed to git):
+- `.gsd/preferences.md` — project preferences
+- `.gsd/PROJECT.md` — living project description
+- `.gsd/REQUIREMENTS.md` — requirement contract
+- `.gsd/DECISIONS.md` — architectural decisions
+- `.gsd/milestones/` — roadmaps, plans, summaries, research
+
+**What stays local** (gitignored):
+- Lock files, metrics, state cache, runtime records, worktrees, activity logs
+
+### 3. Commit the Preferences
+
+```bash
+git add .gsd/preferences.md
+git commit -m "chore: enable GSD team workflow"
+```
+
+## `commit_docs: false`
+
+For teams where only some members use GSD, or when company policy requires a clean repo:
+
+```yaml
+git:
+  commit_docs: false
+```
+
+This adds `.gsd/` to `.gitignore` entirely and keeps all artifacts local. The developer gets the benefits of structured planning without affecting teammates who don't use GSD.
+
+## Migrating an Existing Project
+
+If you have an existing project with `.gsd/` blanket-ignored:
+
+1. Ensure no milestones are in progress (clean state)
+2. Update `.gitignore` to use the selective pattern above
+3. Add `unique_milestone_ids: true` to `.gsd/preferences.md`
+4. Optionally rename existing milestones to use unique IDs:
+   ```
+   I have turned on unique milestone ids, please update all old milestone
+   ids to use this new format e.g. M001-abc123 where abc123 is a random
+   6 char lowercase alpha numeric string. Update all references in all
+   .gsd file contents, file names and directory names. Validate your work
+   once done to ensure referential integrity.
+   ```
+5. Commit
+
+## Parallel Development
+
+Multiple developers can run auto mode simultaneously on different milestones. Each developer:
+
+- Gets their own worktree (`.gsd/worktrees/<MID>/`, gitignored)
+- Works on a unique `milestone/<MID>` branch
+- Squash-merges to main independently
+
+Milestone dependencies can be declared in `M00X-CONTEXT.md` frontmatter:
+
+```yaml
+---
+depends_on: [M001-eh88as]
+---
+```
+
+GSD enforces that dependent milestones complete before starting downstream work.
--- a/native/crates/engine/src/git.rs
+++ b/native/crates/engine/src/git.rs
--- a/native/crates/engine/src/gsd_parser.rs
+++ b/native/crates/engine/src/gsd_parser.rs
@ -47,6 +47,9 @@ pub struct ParsedGsdFile {
    pub body: String,
    /// Map of section heading -> content, serialized as JSON.
    pub sections: String,
+    /// Original raw file content.
+    #[napi(js_name = "rawContent")]
+    pub raw_content: String,
 }

 /// Batch parse result.
@ -769,6 +772,7 @@ pub fn batch_parse_gsd_files(directory: String) -> Result<BatchParseResult> {
            metadata,
            body: body.to_string(),
            sections: sections_json,
+            raw_content: content.clone(),
        });
    }

@ -831,6 +835,546 @@ pub fn parse_roadmap_file(content: String) -> NativeRoadmap {
    parse_roadmap_internal(&content)
 }

+// ─── GSD Tree Scanner ───────────────────────────────────────────────────────
+
+#[napi(object)]
+pub struct GsdTreeEntry {
+    pub path: String,
+    pub name: String,
+    #[napi(js_name = "isDir")]
+    pub is_dir: bool,
+}
+
+#[napi(js_name = "scanGsdTree")]
+pub fn scan_gsd_tree(directory: String) -> Result<Vec<GsdTreeEntry>> {
+    let base = Path::new(&directory);
+    if !base.exists() {
+        return Ok(Vec::new());
+    }
+    let mut entries = Vec::new();
+    collect_tree_entries(base, base, &mut entries)?;
+    Ok(entries)
+}
+
+fn collect_tree_entries(base: &Path, dir: &Path, entries: &mut Vec<GsdTreeEntry>) -> Result<()> {
+    let read_dir = match std::fs::read_dir(dir) {
+        Ok(rd) => rd,
+        Err(e) => {
+            return Err(napi::Error::from_reason(format!(
+                "Failed to read directory {}: {}",
+                dir.display(),
+                e
+            )));
+        }
+    };
+
+    for entry in read_dir {
+        let entry = match entry {
+            Ok(e) => e,
+            Err(_) => continue,
+        };
+        let path = entry.path();
+        let file_type = match entry.file_type() {
+            Ok(ft) => ft,
+            Err(_) => continue,
+        };
+        let relative = path
+            .strip_prefix(base)
+            .unwrap_or(&path)
+            .to_string_lossy()
+            .to_string();
+        let name = entry.file_name().to_string_lossy().to_string();
+        let is_dir = file_type.is_dir();
+
+        entries.push(GsdTreeEntry {
+            path: relative,
+            name,
+            is_dir,
+        });
+
+        if is_dir {
+            collect_tree_entries(base, &path, entries)?;
+        }
+    }
+    Ok(())
+}
+
+// ─── JSONL Tail Parser ──────────────────────────────────────────────────────
+
+#[napi(object)]
+pub struct JsonlParseResult {
+    pub entries: String,
+    pub count: u32,
+    #[napi(js_name = "truncated")]
+    pub truncated: bool,
+}
+
+#[napi(js_name = "parseJsonlTail")]
+pub fn parse_jsonl_tail(
+    file_path: String,
+    max_bytes: Option<u32>,
+    max_entries: Option<u32>,
+) -> Result<JsonlParseResult> {
+    use std::io::{Read, Seek, SeekFrom};
+
+    let max_bytes = max_bytes.unwrap_or(10 * 1024 * 1024) as u64; // default 10MB
+    let max_entries = max_entries.map(|m| m as usize);
+
+    let mut file = match std::fs::File::open(&file_path) {
+        Ok(f) => f,
+        Err(e) => {
+            return Err(napi::Error::from_reason(format!(
+                "Failed to open file {}: {}",
+                file_path, e
+            )));
+        }
+    };
+
+    let file_len = file
+        .metadata()
+        .map_err(|e| napi::Error::from_reason(format!("Failed to get file metadata: {}", e)))?
+        .len();
+
+    let truncated = file_len > max_bytes;
+
+    let content = if truncated {
+        let offset = file_len - max_bytes;
+        file.seek(SeekFrom::Start(offset))
+            .map_err(|e| napi::Error::from_reason(format!("Failed to seek: {}", e)))?;
+        let mut buf = String::new();
+        file.read_to_string(&mut buf)
+            .map_err(|e| napi::Error::from_reason(format!("Failed to read file: {}", e)))?;
+        buf
+    } else {
+        let mut buf = String::new();
+        file.read_to_string(&mut buf)
+            .map_err(|e| napi::Error::from_reason(format!("Failed to read file: {}", e)))?;
+        buf
+    };
+
+    let lines: Vec<&str> = content.split('\n').collect();
+
+    let mut valid_entries: Vec<&str> = Vec::new();
+    for line in &lines {
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+        // Validate JSON
+        if serde_json::from_str::<serde_json::Value>(trimmed).is_ok() {
+            valid_entries.push(trimmed);
+        }
+    }
+
+    // If max_entries is set, take only the last N entries
+    if let Some(max) = max_entries {
+        if valid_entries.len() > max {
+            let skip = valid_entries.len() - max;
+            valid_entries = valid_entries[skip..].to_vec();
+        }
+    }
+
+    let count = valid_entries.len() as u32;
+    let mut entries_json = String::from("[");
+    for (i, entry) in valid_entries.iter().enumerate() {
+        if i > 0 {
+            entries_json.push(',');
+        }
+        entries_json.push_str(entry);
+    }
+    entries_json.push(']');
+
+    Ok(JsonlParseResult {
+        entries: entries_json,
+        count,
+        truncated,
+    })
+}
+
+// ─── Plan File Parser ───────────────────────────────────────────────────────
+
+#[napi(object)]
+pub struct NativeTaskEntry {
+    pub id: String,
+    pub title: String,
+    pub description: String,
+    pub done: bool,
+    pub estimate: String,
+    pub files: Vec<String>,
+    pub verify: String,
+}
+
+#[napi(object)]
+pub struct NativePlan {
+    pub id: String,
+    pub title: String,
+    pub goal: String,
+    pub demo: String,
+    #[napi(js_name = "mustHaves")]
+    pub must_haves: Vec<String>,
+    pub tasks: Vec<NativeTaskEntry>,
+    #[napi(js_name = "filesLikelyTouched")]
+    pub files_likely_touched: Vec<String>,
+}
+
+#[napi(js_name = "parsePlanFile")]
+pub fn parse_plan_file(content: String) -> NativePlan {
+    let (fm_lines, body) = split_frontmatter_internal(&content);
+
+    // Extract id from frontmatter if present, otherwise from heading
+    let fm_map = fm_lines
+        .map(|lines| parse_frontmatter_map_internal(&lines))
+        .unwrap_or_default();
+
+    let fm_id = fm_map.iter().find_map(|(k, v)| {
+        if k == "id" {
+            if let FmValue::Scalar(s) = v {
+                Some(s.clone())
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    });
+
+    // Extract title from # heading: "# ID: Title"
+    let (heading_id, title) = body
+        .lines()
+        .find(|l| l.starts_with("# "))
+        .map(|l| {
+            let heading = l[2..].trim();
+            if let Some(colon_pos) = heading.find(": ") {
+                (
+                    heading[..colon_pos].trim().to_string(),
+                    heading[colon_pos + 2..].trim().to_string(),
+                )
+            } else {
+                (String::new(), heading.to_string())
+            }
+        })
+        .unwrap_or_default();
+
+    let id = fm_id.unwrap_or(heading_id);
+
+    let goal = extract_bold_field(body, "Goal")
+        .unwrap_or("")
+        .to_string();
+
+    let demo = extract_bold_field(body, "Demo")
+        .unwrap_or("")
+        .to_string();
+
+    let must_haves = extract_section_internal(body, "Must-Haves", 2)
+        .map(|s| parse_bullets(&s))
+        .unwrap_or_default();
+
+    let tasks = parse_plan_tasks(body);
+
+    let files_likely_touched = extract_section_internal(body, "Files Likely Touched", 2)
+        .map(|s| parse_bullets(&s))
+        .unwrap_or_default();
+
+    NativePlan {
+        id,
+        title,
+        goal,
+        demo,
+        must_haves,
+        tasks,
+        files_likely_touched,
+    }
+}
+
+fn parse_plan_tasks(body: &str) -> Vec<NativeTaskEntry> {
+    let tasks_section = match extract_section_internal(body, "Tasks", 2) {
+        Some(s) => s,
+        None => return Vec::new(),
+    };
+
+    let mut tasks: Vec<NativeTaskEntry> = Vec::new();
+
+    for line in tasks_section.lines() {
+        let trimmed = line.trim();
+
+        // Check for task checkbox line: - [x] **T01: Task Title** `est:2h`
+        if trimmed.starts_with("- [") && trimmed.len() > 4 {
+            let done_char = trimmed.chars().nth(3).unwrap_or(' ');
+            let done = done_char == 'x' || done_char == 'X';
+
+            let after_bracket = match trimmed.find("] ") {
+                Some(pos) => &trimmed[pos + 2..],
+                None => continue,
+            };
+
+            if !after_bracket.starts_with("**") {
+                continue;
+            }
+
+            let bold_end = match after_bracket[2..].find("**") {
+                Some(pos) => pos,
+                None => continue,
+            };
+            let bold_content = &after_bracket[2..2 + bold_end];
+
+            let (id, title) = if let Some(colon_pos) = bold_content.find(": ") {
+                (
+                    bold_content[..colon_pos].trim().to_string(),
+                    bold_content[colon_pos + 2..].trim().to_string(),
+                )
+            } else {
+                (String::new(), bold_content.to_string())
+            };
+
+            let after_bold = &after_bracket[2 + bold_end + 2..];
+            let estimate = if let Some(est_start) = after_bold.find("`est:") {
+                let val_start = est_start + 5;
+                let val_end = after_bold[val_start..]
+                    .find('`')
+                    .unwrap_or(0)
+                    + val_start;
+                after_bold[val_start..val_end].to_string()
+            } else {
+                String::new()
+            };
+
+            tasks.push(NativeTaskEntry {
+                id,
+                title,
+                description: String::new(),
+                done,
+                estimate,
+                files: Vec::new(),
+                verify: String::new(),
+            });
+            continue;
+        }
+
+        // Sub-items under a task
+        if let Some(task) = tasks.last_mut() {
+            if trimmed.starts_with("- Files:") || trimmed.starts_with("- files:") {
+                let files_str = trimmed[8..].trim();
+                task.files = files_str
+                    .split(',')
+                    .map(|s| s.trim().to_string())
+                    .filter(|s| !s.is_empty())
+                    .collect();
+            } else if trimmed.starts_with("- Verify:") || trimmed.starts_with("- verify:") {
+                task.verify = trimmed[9..].trim().to_string();
+            } else if trimmed.starts_with("- ") && !trimmed.starts_with("- [") {
+                // Description line
+                if task.description.is_empty() {
+                    task.description = trimmed[2..].trim().to_string();
+                }
+            }
+        }
+    }
+
+    tasks
+}
+
+// ─── Summary File Parser ────────────────────────────────────────────────────
+
+#[napi(object)]
+pub struct NativeFileModified {
+    pub path: String,
+    pub description: String,
+}
+
+#[napi(object)]
+pub struct NativeSummaryFrontmatter {
+    pub id: String,
+    pub parent: String,
+    pub milestone: String,
+    pub provides: Vec<String>,
+    pub affects: Vec<String>,
+    #[napi(js_name = "keyFiles")]
+    pub key_files: Vec<String>,
+    #[napi(js_name = "keyDecisions")]
+    pub key_decisions: Vec<String>,
+    #[napi(js_name = "patternsEstablished")]
+    pub patterns_established: Vec<String>,
+    #[napi(js_name = "drillDownPaths")]
+    pub drill_down_paths: Vec<String>,
+    #[napi(js_name = "observabilitySurfaces")]
+    pub observability_surfaces: Vec<String>,
+    pub duration: String,
+    #[napi(js_name = "verificationResult")]
+    pub verification_result: String,
+    #[napi(js_name = "completedAt")]
+    pub completed_at: String,
+    #[napi(js_name = "blockerDiscovered")]
+    pub blocker_discovered: bool,
+}
+
+#[napi(object)]
+pub struct NativeSummary {
+    pub frontmatter: NativeSummaryFrontmatter,
+    pub title: String,
+    #[napi(js_name = "oneLiner")]
+    pub one_liner: String,
+    #[napi(js_name = "whatHappened")]
+    pub what_happened: String,
+    pub deviations: String,
+    #[napi(js_name = "filesModified")]
+    pub files_modified: Vec<NativeFileModified>,
+}
+
+#[napi(js_name = "parseSummaryFile")]
+pub fn parse_summary_file(content: String) -> NativeSummary {
+    let (fm_lines, body) = split_frontmatter_internal(&content);
+
+    let fm_map = fm_lines
+        .map(|lines| parse_frontmatter_map_internal(&lines))
+        .unwrap_or_default();
+
+    let frontmatter = parse_summary_frontmatter(&fm_map);
+
+    let title = body
+        .lines()
+        .find(|l| l.starts_with("# "))
+        .map(|l| l[2..].trim().to_string())
+        .unwrap_or_default();
+
+    // One-liner: first bold line after h1
+    let one_liner = {
+        let mut found_h1 = false;
+        let mut result = String::new();
+        for line in body.lines() {
+            if line.starts_with("# ") {
+                found_h1 = true;
+                continue;
+            }
+            if found_h1 {
+                let trimmed = line.trim();
+                if trimmed.starts_with("**") && trimmed.ends_with("**") {
+                    result = trimmed[2..trimmed.len() - 2].to_string();
+                    break;
+                }
+                if !trimmed.is_empty() && !trimmed.starts_with('#') {
+                    break;
+                }
+            }
+        }
+        result
+    };
+
+    let what_happened = extract_section_internal(body, "What Happened", 2)
+        .unwrap_or_default();
+
+    let deviations = extract_section_internal(body, "Deviations", 2)
+        .unwrap_or_default();
+
+    let files_modified = extract_section_internal(body, "Files Created/Modified", 2)
+        .or_else(|| extract_section_internal(body, "Files Modified", 2))
+        .map(|s| parse_files_modified(&s))
+        .unwrap_or_default();
+
+    NativeSummary {
+        frontmatter,
+        title,
+        one_liner,
+        what_happened,
+        deviations,
+        files_modified,
+    }
+}
+
+fn parse_summary_frontmatter(fm_map: &[(String, FmValue)]) -> NativeSummaryFrontmatter {
+    let get_scalar = |key: &str| -> String {
+        fm_map
+            .iter()
+            .find_map(|(k, v)| {
+                if k == key {
+                    if let FmValue::Scalar(s) = v {
+                        Some(s.clone())
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            })
+            .unwrap_or_default()
+    };
+
+    let get_string_array = |key: &str| -> Vec<String> {
+        fm_map
+            .iter()
+            .find_map(|(k, v)| {
+                if k == key {
+                    if let FmValue::Array(items) = v {
+                        Some(
+                            items
+                                .iter()
+                                .filter_map(|item| {
+                                    if let FmArrayItem::Str(s) = item {
+                                        Some(s.clone())
+                                    } else {
+                                        None
+                                    }
+                                })
+                                .collect(),
+                        )
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            })
+            .unwrap_or_default()
+    };
+
+    let blocker_str = get_scalar("blocker_discovered");
+    let blocker_discovered =
+        blocker_str == "true" || blocker_str == "yes" || blocker_str == "True";
+
+    NativeSummaryFrontmatter {
+        id: get_scalar("id"),
+        parent: get_scalar("parent"),
+        milestone: get_scalar("milestone"),
+        provides: get_string_array("provides"),
+        affects: get_string_array("affects"),
+        key_files: get_string_array("key_files"),
+        key_decisions: get_string_array("key_decisions"),
+        patterns_established: get_string_array("patterns_established"),
+        drill_down_paths: get_string_array("drill_down_paths"),
+        observability_surfaces: get_string_array("observability_surfaces"),
+        duration: get_scalar("duration"),
+        verification_result: get_scalar("verification_result"),
+        completed_at: get_scalar("completed_at"),
+        blocker_discovered,
+    }
+}
+
+fn parse_files_modified(section: &str) -> Vec<NativeFileModified> {
+    let mut files = Vec::new();
+    for line in section.lines() {
+        let trimmed = line.trim();
+        let text = if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
+            &trimmed[2..]
+        } else {
+            continue;
+        };
+
+        // Parse `path` — description  or  `path` - description
+        if text.starts_with('`') {
+            if let Some(end_tick) = text[1..].find('`') {
+                let path = text[1..1 + end_tick].to_string();
+                let rest = text[1 + end_tick + 1..].trim();
+                let description = if rest.starts_with("—") || rest.starts_with("–") || rest.starts_with('-') {
+                    rest[rest.find(|c: char| c != '—' && c != '–' && c != '-').unwrap_or(rest.len())..].trim().to_string()
+                } else {
+                    rest.to_string()
+                };
+                files.push(NativeFileModified { path, description });
+            }
+        }
+    }
+    files
+}
+
 // ─── Tests ──────────────────────────────────────────────────────────────────

 #[cfg(test)]
--- a/native/npm/darwin-arm64/package.json
+++ b/native/npm/darwin-arm64/package.json
@ -1,6 +1,6 @@
 {
  "name": "@gsd-build/engine-darwin-arm64",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD native engine binary for macOS ARM64",
  "os": [
    "darwin"
--- a/native/npm/darwin-x64/package.json
+++ b/native/npm/darwin-x64/package.json
@ -1,6 +1,6 @@
 {
  "name": "@gsd-build/engine-darwin-x64",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD native engine binary for macOS Intel",
  "os": [
    "darwin"
--- a/native/npm/linux-arm64-gnu/package.json
+++ b/native/npm/linux-arm64-gnu/package.json
@ -1,6 +1,6 @@
 {
  "name": "@gsd-build/engine-linux-arm64-gnu",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD native engine binary for Linux ARM64 (glibc)",
  "os": [
    "linux"
--- a/native/npm/linux-x64-gnu/package.json
+++ b/native/npm/linux-x64-gnu/package.json
@ -1,6 +1,6 @@
 {
  "name": "@gsd-build/engine-linux-x64-gnu",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD native engine binary for Linux x64 (glibc)",
  "os": [
    "linux"
--- a/native/npm/win32-x64-msvc/package.json
+++ b/native/npm/win32-x64-msvc/package.json
@ -1,6 +1,6 @@
 {
  "name": "@gsd-build/engine-win32-x64-msvc",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD native engine binary for Windows x64 (MSVC)",
  "os": [
    "win32"
--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "gsd-pi",
-  "version": "2.13.1",
+  "version": "2.16.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "gsd-pi",
-      "version": "2.13.1",
+      "version": "2.16.0",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "gsd-pi",
-  "version": "2.13.1",
+  "version": "2.17.0",
  "description": "GSD — Get Shit Done coding agent",
  "license": "MIT",
  "repository": {
@ -61,7 +61,8 @@
    "sync-pkg-version": "node scripts/sync-pkg-version.cjs",
    "sync-platform-versions": "node native/scripts/sync-platform-versions.cjs",
    "validate-pack": "bash scripts/validate-pack.sh",
-    "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1) && npm run build && npm run validate-pack"
+    "typecheck:extensions": "tsc --noEmit --project tsconfig.extensions.json",
+    "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1) && npm run build && npm run typecheck:extensions && npm run validate-pack"
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.73.0",
--- a/packages/pi-ai/src/models.generated.ts
+++ b/packages/pi-ai/src/models.generated.ts
@ -13523,9 +13523,63 @@ export const MODELS = {
 		} satisfies Model<"anthropic-messages">,
 	},
 	"ollama-cloud": {
-		"llama3.1:8b": {
-			id: "llama3.1:8b",
-			name: "Llama 3.1 8B",
+		"cogito-2.1:671b": {
+			id: "cogito-2.1:671b",
+			name: "Cogito 2.1 671B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 163840,
+			maxTokens: 32000,
+		} satisfies Model<"openai-completions">,
+		"deepseek-v3.1:671b": {
+			id: "deepseek-v3.1:671b",
+			name: "DeepSeek V3.1 671B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 163840,
+			maxTokens: 163840,
+		} satisfies Model<"openai-completions">,
+		"deepseek-v3.2": {
+			id: "deepseek-v3.2",
+			name: "DeepSeek V3.2",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 163840,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"devstral-2:123b": {
+			id: "devstral-2:123b",
+			name: "Devstral 2 123B",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
@ -13538,48 +13592,30 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 262144,
+			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
-		"llama3.1:70b": {
-			id: "llama3.1:70b",
-			name: "Llama 3.1 70B",
+		"devstral-small-2:24b": {
+			id: "devstral-small-2:24b",
+			name: "Devstral Small 2 24B",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
 			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 262144,
+			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
-		"llama3.1:405b": {
-			id: "llama3.1:405b",
-			name: "Llama 3.1 405B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 8192,
-		} satisfies Model<"openai-completions">,
-		"qwen3:8b": {
-			id: "qwen3:8b",
-			name: "Qwen 3 8B",
+		"gemini-3-flash-preview": {
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
@ -13592,62 +13628,8 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
-		} satisfies Model<"openai-completions">,
-		"qwen3:32b": {
-			id: "qwen3:32b",
-			name: "Qwen 3 32B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 8192,
-		} satisfies Model<"openai-completions">,
-		"deepseek-r1:8b": {
-			id: "deepseek-r1:8b",
-			name: "DeepSeek R1 8B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 8192,
-		} satisfies Model<"openai-completions">,
-		"deepseek-r1:70b": {
-			id: "deepseek-r1:70b",
-			name: "DeepSeek R1 70B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 1048576,
+			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
 		"gemma3:12b": {
 			id: "gemma3:12b",
@ -13657,7 +13639,7 @@ export const MODELS = {
 			baseUrl: "https://ollama.com/v1",
 			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0,
 				output: 0,
@ -13665,7 +13647,7 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 8192,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"gemma3:27b": {
 			id: "gemma3:27b",
@ -13675,7 +13657,7 @@ export const MODELS = {
 			baseUrl: "https://ollama.com/v1",
 			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0,
 				output: 0,
@ -13683,17 +13665,17 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 8192,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
-		"mistral:7b": {
-			id: "mistral:7b",
-			name: "Mistral 7B",
+		"gemma3:4b": {
+			id: "gemma3:4b",
+			name: "Gemma 3 4B",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
 			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0,
 				output: 0,
@ -13701,16 +13683,16 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 8192,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
-		"phi4:14b": {
-			id: "phi4:14b",
-			name: "Phi-4 14B",
+		"glm-4.6": {
+			id: "glm-4.6",
+			name: "GLM 4.6",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
 			input: ["text"],
 			cost: {
 				input: 0,
@ -13718,17 +13700,17 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 202752,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
-		"gpt-oss:20b": {
-			id: "gpt-oss:20b",
-			name: "GPT-OSS 20B",
+		"glm-4.7": {
+			id: "glm-4.7",
+			name: "GLM 4.7",
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
 			input: ["text"],
 			cost: {
 				input: 0,
@ -13736,8 +13718,26 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 202752,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"glm-5": {
+			id: "glm-5",
+			name: "GLM 5",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 202752,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"gpt-oss:120b": {
 			id: "gpt-oss:120b",
@ -13745,6 +13745,42 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "ollama-cloud",
 			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 32768,
+		} satisfies Model<"openai-completions">,
+		"gpt-oss:20b": {
+			id: "gpt-oss:20b",
+			name: "GPT-OSS 20B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 32768,
+		} satisfies Model<"openai-completions">,
+		"kimi-k2:1t": {
+			id: "kimi-k2:1t",
+			name: "Kimi K2 1T",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
 			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
 			reasoning: false,
 			input: ["text"],
@ -13754,8 +13790,332 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 8192,
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"kimi-k2.5": {
+			id: "kimi-k2.5",
+			name: "Kimi K2.5",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"kimi-k2-thinking": {
+			id: "kimi-k2-thinking",
+			name: "Kimi K2 Thinking",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"minimax-m2.1": {
+			id: "minimax-m2.1",
+			name: "Minimax M2.1",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"minimax-m2.5": {
+			id: "minimax-m2.5",
+			name: "Minimax M2.5",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"minimax-m2": {
+			id: "minimax-m2",
+			name: "Minimax M2",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+		"ministral-3:14b": {
+			id: "ministral-3:14b",
+			name: "Ministral 3 14B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+		"ministral-3:3b": {
+			id: "ministral-3:3b",
+			name: "Ministral 3 3B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+		"ministral-3:8b": {
+			id: "ministral-3:8b",
+			name: "Ministral 3 8B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+		"mistral-large-3:675b": {
+			id: "mistral-large-3:675b",
+			name: "Mistral Large 3 675B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"nemotron-3-nano:30b": {
+			id: "nemotron-3-nano:30b",
+			name: "Nemotron 3 Nano 30B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"nemotron-3-super": {
+			id: "nemotron-3-super",
+			name: "Nemotron 3 Super",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"qwen3.5:397b": {
+			id: "qwen3.5:397b",
+			name: "Qwen 3.5 397B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 81920,
+		} satisfies Model<"openai-completions">,
+		"qwen3-coder:480b": {
+			id: "qwen3-coder:480b",
+			name: "Qwen 3 Coder 480B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"qwen3-coder-next": {
+			id: "qwen3-coder-next",
+			name: "Qwen 3 Coder Next",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"qwen3-next:80b": {
+			id: "qwen3-next:80b",
+			name: "Qwen 3 Next 80B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 32768,
+		} satisfies Model<"openai-completions">,
+		"qwen3-vl:235b-instruct": {
+			id: "qwen3-vl:235b-instruct",
+			name: "Qwen 3 VL 235B Instruct",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"qwen3-vl:235b": {
+			id: "qwen3-vl:235b",
+			name: "Qwen 3 VL 235B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 32768,
+		} satisfies Model<"openai-completions">,
+		"rnj-1:8b": {
+			id: "rnj-1:8b",
+			name: "RNJ 1 8B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 32768,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 	},
 } as const;
--- a/packages/pi-ai/src/providers/google-shared.test.ts
+++ b/packages/pi-ai/src/providers/google-shared.test.ts
@ -0,0 +1,137 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { sanitizeSchemaForGoogle } from "./google-shared.js";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// sanitizeSchemaForGoogle
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("sanitizeSchemaForGoogle", () => {
+	it("passes through primitives unchanged", () => {
+		assert.equal(sanitizeSchemaForGoogle(null), null);
+		assert.equal(sanitizeSchemaForGoogle(42), 42);
+		assert.equal(sanitizeSchemaForGoogle("hello"), "hello");
+		assert.equal(sanitizeSchemaForGoogle(true), true);
+	});
+
+	it("passes through a valid schema with no banned fields", () => {
+		const schema = {
+			type: "object",
+			properties: {
+				name: { type: "string" },
+				age: { type: "number" },
+			},
+			required: ["name"],
+		};
+		assert.deepEqual(sanitizeSchemaForGoogle(schema), schema);
+	});
+
+	it("removes top-level patternProperties", () => {
+		const schema = {
+			type: "object",
+			patternProperties: { "^S_": { type: "string" } },
+			properties: { foo: { type: "string" } },
+		};
+		const result = sanitizeSchemaForGoogle(schema) as Record<string, unknown>;
+		assert.ok(!("patternProperties" in result));
+		assert.deepEqual(result.properties, { foo: { type: "string" } });
+	});
+
+	it("removes nested patternProperties", () => {
+		const schema = {
+			type: "object",
+			properties: {
+				nested: {
+					type: "object",
+					patternProperties: { ".*": { type: "string" } },
+				},
+			},
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		assert.ok(!("patternProperties" in result.properties.nested));
+	});
+
+	it("converts top-level const to enum", () => {
+		const schema = { const: "fixed-value" };
+		const result = sanitizeSchemaForGoogle(schema) as Record<string, unknown>;
+		assert.deepEqual(result.enum, ["fixed-value"]);
+		assert.ok(!("const" in result));
+	});
+
+	it("converts const to enum inside anyOf", () => {
+		const schema = {
+			anyOf: [{ const: "a" }, { const: "b" }, { type: "string" }],
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		assert.deepEqual(result.anyOf[0], { enum: ["a"] });
+		assert.deepEqual(result.anyOf[1], { enum: ["b"] });
+		assert.deepEqual(result.anyOf[2], { type: "string" });
+	});
+
+	it("converts const to enum inside oneOf", () => {
+		const schema = {
+			oneOf: [{ const: "x" }, { const: "y" }],
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		assert.deepEqual(result.oneOf[0], { enum: ["x"] });
+		assert.deepEqual(result.oneOf[1], { enum: ["y"] });
+	});
+
+	it("recursively sanitizes deeply nested schemas", () => {
+		const schema = {
+			type: "object",
+			properties: {
+				level1: {
+					type: "object",
+					properties: {
+						level2: {
+							anyOf: [{ const: "deep" }, { type: "null" }],
+							patternProperties: { ".*": { type: "string" } },
+						},
+					},
+				},
+			},
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		const level2 = result.properties.level1.properties.level2;
+		assert.deepEqual(level2.anyOf[0], { enum: ["deep"] });
+		assert.ok(!("patternProperties" in level2));
+	});
+
+	it("sanitizes items in array schemas", () => {
+		const schema = {
+			type: "array",
+			items: {
+				anyOf: [{ const: "foo" }, { type: "string" }],
+			},
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		assert.deepEqual(result.items.anyOf[0], { enum: ["foo"] });
+	});
+
+	it("sanitizes arrays of schemas", () => {
+		const input = [{ const: "a" }, { const: "b" }];
+		const result = sanitizeSchemaForGoogle(input) as any[];
+		assert.deepEqual(result[0], { enum: ["a"] });
+		assert.deepEqual(result[1], { enum: ["b"] });
+	});
+
+	it("preserves non-string const values unchanged", () => {
+		// Only string const values are converted; number const is passed through
+		const schema = { const: 42 };
+		const result = sanitizeSchemaForGoogle(schema) as Record<string, unknown>;
+		assert.equal(result.const, 42);
+		assert.ok(!("enum" in result));
+	});
+
+	it("sanitizes additionalProperties", () => {
+		const schema = {
+			type: "object",
+			additionalProperties: {
+				patternProperties: { "^x-": { type: "string" } },
+			},
+		};
+		const result = sanitizeSchemaForGoogle(schema) as any;
+		assert.ok(!("patternProperties" in result.additionalProperties));
+	});
+});
--- a/packages/pi-ai/src/providers/google-shared.ts
+++ b/packages/pi-ai/src/providers/google-shared.ts
@ -226,6 +226,52 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
 	return contents;
 }

+/**
+ * Sanitize a JSON Schema for Google's function declarations API.
+ * Google's API rejects `patternProperties` and `const` fields which are valid in JSON Schema.
+ *
+ * This function recursively:
+ * - Removes all `patternProperties` fields
+ * - Converts `const: "value"` to `enum: ["value"]` in anyOf/oneOf blocks
+ *
+ * This is needed for providers like `google-antigravity` when proxying Claude models,
+ * since Google Cloud Code Assist uses a restricted subset of JSON Schema.
+ */
+export function sanitizeSchemaForGoogle(schema: unknown): unknown {
+	if (!schema || typeof schema !== "object") {
+		return schema;
+	}
+
+	if (Array.isArray(schema)) {
+		return schema.map((item) => sanitizeSchemaForGoogle(item));
+	}
+
+	const obj = schema as Record<string, unknown>;
+	const sanitized: Record<string, unknown> = {};
+
+	for (const [key, value] of Object.entries(obj)) {
+		// Skip patternProperties entirely — not supported by Google's API
+		if (key === "patternProperties") {
+			continue;
+		}
+
+		// Convert const to enum — Google's API rejects the const keyword
+		if (key === "const" && typeof value === "string") {
+			sanitized.enum = [value];
+			continue;
+		}
+
+		// Recursively sanitize all nested objects and arrays
+		if (typeof value === "object") {
+			sanitized[key] = sanitizeSchemaForGoogle(value);
+		} else {
+			sanitized[key] = value;
+		}
+	}
+
+	return sanitized;
+}
+
 /**
 * Convert tools to Gemini function declarations format.
 *
@ -233,6 +279,9 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
 * anyOf, oneOf, const, etc.). Set `useParameters` to true to use the legacy `parameters`
 * field instead (OpenAPI 3.03 Schema). This is needed for Cloud Code Assist with Claude
 * models, where the API translates `parameters` into Anthropic's `input_schema`.
+ *
+ * The schema is automatically sanitized to remove fields not supported by Google's
+ * function declarations API (patternProperties, const converted to enum, etc.).
 */
 export function convertTools(
 	tools: Tool[],
@ -244,7 +293,9 @@ export function convertTools(
 			functionDeclarations: tools.map((tool) => ({
 				name: tool.name,
 				description: tool.description,
-				...(useParameters ? { parameters: tool.parameters } : { parametersJsonSchema: tool.parameters }),
+				...(useParameters
+					? { parameters: sanitizeSchemaForGoogle(tool.parameters) }
+					: { parametersJsonSchema: sanitizeSchemaForGoogle(tool.parameters) }),
 			})),
 		},
 	];
--- a/packages/pi-coding-agent/src/cli/args.ts
+++ b/packages/pi-coding-agent/src/cli/args.ts
@ -38,6 +38,11 @@ export interface Args {
 	themes?: string[];
 	noThemes?: boolean;
 	listModels?: string | true;
+	discover?: boolean;
+	addProvider?: string;
+	addProviderBaseUrl?: string;
+	addProviderApiKey?: string;
+	discoverModels?: string | true;
 	offline?: boolean;
 	verbose?: boolean;
 	messages: string[];
@ -150,6 +155,18 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
 			} else {
 				result.listModels = true;
 			}
+		} else if (arg === "--discover") {
+			result.discover = true;
+		} else if (arg === "--add-provider" && i + 1 < args.length) {
+			result.addProvider = args[++i];
+		} else if (arg === "--base-url" && i + 1 < args.length) {
+			result.addProviderBaseUrl = args[++i];
+		} else if (arg === "--discover-models") {
+			if (i + 1 < args.length && !args[i + 1].startsWith("-") && !args[i + 1].startsWith("@")) {
+				result.discoverModels = args[++i];
+			} else {
+				result.discoverModels = true;
+			}
 		} else if (arg === "--verbose") {
 			result.verbose = true;
 		} else if (arg === "--offline") {
@ -219,6 +236,10 @@ ${chalk.bold("Options:")}
  --no-themes                    Disable theme discovery and loading
  --export <file>                Export session file to HTML and exit
  --list-models [search]         List available models (with optional fuzzy search)
+  --discover                     Include discovered models in --list-models output
+  --discover-models [provider]   Discover models from provider APIs (all or specific)
+  --add-provider <name>          Add a provider to models.json (use with --base-url, --api-key)
+  --base-url <url>               Base URL for --add-provider
  --verbose                      Force verbose startup (overrides quietStartup setting)
  --offline                      Disable startup network operations (same as PI_OFFLINE=1)
  --help, -h                     Show this help
--- a/packages/pi-coding-agent/src/cli/list-models.ts
+++ b/packages/pi-coding-agent/src/cli/list-models.ts
@ -1,11 +1,18 @@
 /**
- * List available models with optional fuzzy search
+ * List available models with optional fuzzy search and discovery support
 */

 import type { Api, Model } from "@gsd/pi-ai";
 import { fuzzyFilter } from "@gsd/pi-tui";
 import type { ModelRegistry } from "../core/model-registry.js";

+export interface ListModelsOptions {
+	/** Include discovered models in output */
+	discover?: boolean;
+	/** Search pattern for fuzzy filtering */
+	searchPattern?: string;
+}
+
 /**
 * Format a number as human-readable (e.g., 200000 -> "200K", 1000000 -> "1M")
 */
@ -22,10 +29,48 @@ function formatTokenCount(count: number): string {
 }

 /**
- * List available models, optionally filtered by search pattern
+ * Discover models from provider APIs and print results.
 */
-export async function listModels(modelRegistry: ModelRegistry, searchPattern?: string): Promise<void> {
-	const models = modelRegistry.getAvailable();
+export async function discoverAndPrintModels(
+	modelRegistry: ModelRegistry,
+	provider?: string,
+): Promise<void> {
+	const providers = provider ? [provider] : undefined;
+
+	console.log("Discovering models...");
+	const results = await modelRegistry.discoverModels(providers);
+
+	for (const result of results) {
+		if (result.error) {
+			console.log(`  ${result.provider}: error - ${result.error}`);
+		} else {
+			console.log(`  ${result.provider}: ${result.models.length} models found`);
+		}
+	}
+}
+
+/**
+ * List available models, optionally filtered by search pattern.
+ * Accepts either a string (backward compat) or ListModelsOptions.
+ */
+export async function listModels(
+	modelRegistry: ModelRegistry,
+	optionsOrSearch?: string | ListModelsOptions,
+): Promise<void> {
+	const options: ListModelsOptions =
+		typeof optionsOrSearch === "string"
+			? { searchPattern: optionsOrSearch }
+			: optionsOrSearch ?? {};
+
+	// If discover flag is set, run discovery first
+	if (options.discover) {
+		await modelRegistry.discoverModels();
+	}
+
+	// Get models — include discovered if discovery was run
+	const models = options.discover
+		? modelRegistry.getAllWithDiscovered()
+		: modelRegistry.getAvailable();

 	if (models.length === 0) {
 		console.log("No models available. Set API keys in environment variables.");
@ -34,12 +79,12 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s

 	// Apply fuzzy filter if search pattern provided
 	let filteredModels: Model<Api>[] = models;
-	if (searchPattern) {
-		filteredModels = fuzzyFilter(models, searchPattern, (m) => `${m.provider} ${m.id}`);
+	if (options.searchPattern) {
+		filteredModels = fuzzyFilter(models, options.searchPattern, (m) => `${m.provider} ${m.id}`);
 	}

 	if (filteredModels.length === 0) {
-		console.log(`No models matching "${searchPattern}"`);
+		console.log(`No models matching "${options.searchPattern}"`);
 		return;
 	}

@ -53,15 +98,19 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s
 	});

 	// Calculate column widths
-	const rows = filteredModels.map((m) => ({
-		provider: m.provider,
-		model: m.id,
-		name: m.name,
-		context: formatTokenCount(m.contextWindow),
-		maxOut: formatTokenCount(m.maxTokens),
-		thinking: m.reasoning ? "yes" : "no",
-		images: m.input.includes("image") ? "yes" : "no",
-	}));
+	const rows = filteredModels.map((m) => {
+		const isDiscovered = options.discover && modelRegistry.isDiscovered(m);
+		return {
+			provider: m.provider,
+			model: m.id,
+			name: m.name,
+			context: formatTokenCount(m.contextWindow),
+			maxOut: formatTokenCount(m.maxTokens),
+			thinking: m.reasoning ? "yes" : "no",
+			images: m.input.includes("image") ? "yes" : "no",
+			badge: isDiscovered ? "[discovered]" : "",
+		};
+	});

 	const headers = {
 		provider: "provider",
@ -71,6 +120,7 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s
 		maxOut: "max-out",
 		thinking: "thinking",
 		images: "images",
+		badge: "",
 	};

 	const widths = {
@ -105,7 +155,10 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s
 			row.maxOut.padEnd(widths.maxOut),
 			row.thinking.padEnd(widths.thinking),
 			row.images.padEnd(widths.images),
-		].join("  ");
+			row.badge,
+		]
+			.join("  ")
+			.trimEnd();
 		console.log(line);
 	}
 }
--- a/packages/pi-coding-agent/src/config.ts
+++ b/packages/pi-coding-agent/src/config.ts
@ -77,29 +77,33 @@ export function getUpdateInstruction(packageName: string): string {
 * - For Node.js (dist/): returns __dirname (the dist/ directory)
 * - For tsx (src/): returns parent directory (the package root)
 */
+let _cachedPackageDir: string | undefined;
+
 export function getPackageDir(): string {
+	if (_cachedPackageDir !== undefined) return _cachedPackageDir;
+
 	// Allow override via environment variable (useful for Nix/Guix where store paths tokenize poorly)
 	const envDir = process.env.PI_PACKAGE_DIR;
 	if (envDir) {
-		if (envDir === "~") return homedir();
-		if (envDir.startsWith("~/")) return homedir() + envDir.slice(1);
-		return envDir;
+		if (envDir === "~") return (_cachedPackageDir = homedir());
+		if (envDir.startsWith("~/")) return (_cachedPackageDir = homedir() + envDir.slice(1));
+		return (_cachedPackageDir = envDir);
 	}

 	if (isBunBinary) {
 		// Bun binary: process.execPath points to the compiled executable
-		return dirname(process.execPath);
+		return (_cachedPackageDir = dirname(process.execPath));
 	}
 	// Node.js: walk up from __dirname until we find package.json
 	let dir = __dirname;
 	while (dir !== dirname(dir)) {
 		if (existsSync(join(dir, "package.json"))) {
-			return dir;
+			return (_cachedPackageDir = dir);
 		}
 		dir = dirname(dir);
 	}
 	// Fallback (shouldn't happen)
-	return __dirname;
+	return (_cachedPackageDir = __dirname);
 }

 /**
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@ -1354,6 +1354,9 @@ export class AgentSession {
 		this._disconnectFromAgent();
 		await this.abort();
 		this.agent.reset();
+		// Update cwd to current process directory — auto-mode may have chdir'd
+		// into a worktree since the original session was created.
+		this._cwd = process.cwd();
 		this.sessionManager.newSession({ parentSession: options?.parentSession });
 		this.agent.sessionId = this.sessionManager.getSessionId();
 		this._steeringMessages = [];
--- a/packages/pi-coding-agent/src/core/discovery-cache.test.ts
+++ b/packages/pi-coding-agent/src/core/discovery-cache.test.ts
@ -0,0 +1,170 @@
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, it } from "node:test";
+import { ModelDiscoveryCache } from "./discovery-cache.js";
+
+let testDir: string;
+let cachePath: string;
+
+beforeEach(() => {
+	testDir = join(tmpdir(), `discovery-cache-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+	mkdirSync(testDir, { recursive: true });
+	cachePath = join(testDir, "discovery-cache.json");
+});
+
+afterEach(() => {
+	try {
+		rmSync(testDir, { recursive: true, force: true });
+	} catch {
+		// Cleanup best-effort
+	}
+});
+
+// ─── basic operations ────────────────────────────────────────────────────────
+
+describe("ModelDiscoveryCache — basic operations", () => {
+	it("starts with no entries", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		assert.equal(cache.get("openai"), undefined);
+	});
+
+	it("stores and retrieves models", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		const models = [{ id: "gpt-4o", name: "GPT-4o" }];
+		cache.set("openai", models);
+
+		const entry = cache.get("openai");
+		assert.ok(entry);
+		assert.deepEqual(entry.models, models);
+		assert.ok(entry.fetchedAt > 0);
+		assert.ok(entry.ttlMs > 0);
+	});
+
+	it("persists to disk and reloads", () => {
+		const cache1 = new ModelDiscoveryCache(cachePath);
+		cache1.set("openai", [{ id: "gpt-4o" }]);
+
+		const cache2 = new ModelDiscoveryCache(cachePath);
+		const entry = cache2.get("openai");
+		assert.ok(entry);
+		assert.equal(entry.models[0].id, "gpt-4o");
+	});
+
+	it("clear removes a specific provider", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		cache.set("google", [{ id: "gemini-pro" }]);
+
+		cache.clear("openai");
+		assert.equal(cache.get("openai"), undefined);
+		assert.ok(cache.get("google"));
+	});
+
+	it("clear without provider removes all entries", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		cache.set("google", [{ id: "gemini-pro" }]);
+
+		cache.clear();
+		assert.equal(cache.get("openai"), undefined);
+		assert.equal(cache.get("google"), undefined);
+	});
+});
+
+// ─── staleness ───────────────────────────────────────────────────────────────
+
+describe("ModelDiscoveryCache — staleness", () => {
+	it("newly set entries are not stale", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		assert.equal(cache.isStale("openai"), false);
+	});
+
+	it("missing providers are stale", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		assert.equal(cache.isStale("unknown"), true);
+	});
+
+	it("entries with expired TTL are stale", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }], 1); // 1ms TTL
+
+		// Wait for TTL to expire
+		const start = Date.now();
+		while (Date.now() - start < 5) {
+			// busy wait
+		}
+
+		assert.equal(cache.isStale("openai"), true);
+	});
+});
+
+// ─── getAll ──────────────────────────────────────────────────────────────────
+
+describe("ModelDiscoveryCache — getAll", () => {
+	it("returns non-stale entries by default", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		cache.set("stale", [{ id: "old" }], 1);
+
+		// Wait for stale TTL
+		const start = Date.now();
+		while (Date.now() - start < 5) {
+			// busy wait
+		}
+
+		const all = cache.getAll();
+		assert.ok(all.has("openai"));
+		assert.ok(!all.has("stale"));
+	});
+
+	it("returns all entries when includeStale is true", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		cache.set("stale", [{ id: "old" }], 1);
+
+		// Wait for stale TTL
+		const start = Date.now();
+		while (Date.now() - start < 5) {
+			// busy wait
+		}
+
+		const all = cache.getAll(true);
+		assert.ok(all.has("openai"));
+		assert.ok(all.has("stale"));
+	});
+});
+
+// ─── edge cases ──────────────────────────────────────────────────────────────
+
+describe("ModelDiscoveryCache — edge cases", () => {
+	it("handles corrupted cache file gracefully", () => {
+		writeFileSync(cachePath, "not valid json", "utf-8");
+		const cache = new ModelDiscoveryCache(cachePath);
+		assert.equal(cache.get("openai"), undefined);
+	});
+
+	it("handles wrong version gracefully", () => {
+		writeFileSync(cachePath, JSON.stringify({ version: 99, entries: {} }), "utf-8");
+		const cache = new ModelDiscoveryCache(cachePath);
+		assert.equal(cache.get("openai"), undefined);
+	});
+
+	it("handles missing cache file", () => {
+		const cache = new ModelDiscoveryCache(join(testDir, "nonexistent", "cache.json"));
+		assert.equal(cache.get("openai"), undefined);
+	});
+
+	it("overwrites existing entry for same provider", () => {
+		const cache = new ModelDiscoveryCache(cachePath);
+		cache.set("openai", [{ id: "gpt-4o" }]);
+		cache.set("openai", [{ id: "gpt-4o-mini" }]);
+
+		const entry = cache.get("openai");
+		assert.ok(entry);
+		assert.equal(entry.models.length, 1);
+		assert.equal(entry.models[0].id, "gpt-4o-mini");
+	});
+});
--- a/packages/pi-coding-agent/src/core/discovery-cache.ts
+++ b/packages/pi-coding-agent/src/core/discovery-cache.ts
@ -0,0 +1,97 @@
+/**
+ * Disk-based cache for discovered models.
+ * Stores results at {agentDir}/discovery-cache.json with per-provider TTLs.
+ */
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
+import { dirname, join } from "path";
+import { getAgentDir } from "../config.js";
+import { type DiscoveredModel, getDefaultTTL } from "./model-discovery.js";
+
+export interface DiscoveryCacheEntry {
+	models: DiscoveredModel[];
+	fetchedAt: number;
+	ttlMs: number;
+}
+
+export interface DiscoveryCacheData {
+	version: 1;
+	entries: Record<string, DiscoveryCacheEntry>;
+}
+
+export class ModelDiscoveryCache {
+	private data: DiscoveryCacheData;
+	private cachePath: string;
+
+	constructor(cachePath?: string) {
+		this.cachePath = cachePath ?? join(getAgentDir(), "discovery-cache.json");
+		this.data = { version: 1, entries: {} };
+		this.load();
+	}
+
+	get(provider: string): DiscoveryCacheEntry | undefined {
+		const entry = this.data.entries[provider];
+		return entry;
+	}
+
+	set(provider: string, models: DiscoveredModel[], ttlMs?: number): void {
+		this.data.entries[provider] = {
+			models,
+			fetchedAt: Date.now(),
+			ttlMs: ttlMs ?? getDefaultTTL(provider),
+		};
+		this.save();
+	}
+
+	isStale(provider: string): boolean {
+		const entry = this.data.entries[provider];
+		if (!entry) return true;
+		return Date.now() - entry.fetchedAt > entry.ttlMs;
+	}
+
+	clear(provider?: string): void {
+		if (provider) {
+			delete this.data.entries[provider];
+		} else {
+			this.data.entries = {};
+		}
+		this.save();
+	}
+
+	getAll(includeStale = false): Map<string, DiscoveryCacheEntry> {
+		const result = new Map<string, DiscoveryCacheEntry>();
+		for (const [provider, entry] of Object.entries(this.data.entries)) {
+			if (includeStale || !this.isStale(provider)) {
+				result.set(provider, entry);
+			}
+		}
+		return result;
+	}
+
+	load(): void {
+		try {
+			if (existsSync(this.cachePath)) {
+				const content = readFileSync(this.cachePath, "utf-8");
+				const parsed = JSON.parse(content) as DiscoveryCacheData;
+				if (parsed.version === 1 && parsed.entries) {
+					this.data = parsed;
+				}
+			}
+		} catch {
+			// Corrupted or unreadable cache — start fresh
+			this.data = { version: 1, entries: {} };
+		}
+	}
+
+	save(): void {
+		try {
+			const dir = dirname(this.cachePath);
+			if (!existsSync(dir)) {
+				mkdirSync(dir, { recursive: true });
+			}
+			writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8");
+		} catch {
+			// Silently ignore write failures (read-only FS, permissions, etc.)
+		}
+	}
+}
--- a/packages/pi-coding-agent/src/core/extensions/types.ts
+++ b/packages/pi-coding-agent/src/core/extensions/types.ts
@ -117,7 +117,7 @@ export interface ExtensionUIContext {
 	input(title: string, placeholder?: string, opts?: ExtensionUIDialogOptions): Promise<string | undefined>;

 	/** Show a notification to the user. */
-	notify(message: string, type?: "info" | "warning" | "error"): void;
+	notify(message: string, type?: "info" | "warning" | "error" | "success"): void;

 	/** Listen to raw terminal input (interactive mode only). Returns an unsubscribe function. */
 	onTerminalInput(handler: TerminalInputHandler): () => void;
--- a/packages/pi-coding-agent/src/core/model-discovery.test.ts
+++ b/packages/pi-coding-agent/src/core/model-discovery.test.ts
@ -0,0 +1,125 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import {
+	DISCOVERY_TTLS,
+	getDefaultTTL,
+	getDiscoverableProviders,
+	getDiscoveryAdapter,
+} from "./model-discovery.js";
+
+// ─── getDiscoveryAdapter ─────────────────────────────────────────────────────
+
+describe("getDiscoveryAdapter", () => {
+	it("returns an adapter for openai", () => {
+		const adapter = getDiscoveryAdapter("openai");
+		assert.equal(adapter.provider, "openai");
+		assert.equal(adapter.supportsDiscovery, true);
+	});
+
+	it("returns an adapter for ollama", () => {
+		const adapter = getDiscoveryAdapter("ollama");
+		assert.equal(adapter.provider, "ollama");
+		assert.equal(adapter.supportsDiscovery, true);
+	});
+
+	it("returns an adapter for openrouter", () => {
+		const adapter = getDiscoveryAdapter("openrouter");
+		assert.equal(adapter.provider, "openrouter");
+		assert.equal(adapter.supportsDiscovery, true);
+	});
+
+	it("returns an adapter for google", () => {
+		const adapter = getDiscoveryAdapter("google");
+		assert.equal(adapter.provider, "google");
+		assert.equal(adapter.supportsDiscovery, true);
+	});
+
+	it("returns a static adapter for anthropic", () => {
+		const adapter = getDiscoveryAdapter("anthropic");
+		assert.equal(adapter.provider, "anthropic");
+		assert.equal(adapter.supportsDiscovery, false);
+	});
+
+	it("returns a static adapter for bedrock", () => {
+		const adapter = getDiscoveryAdapter("bedrock");
+		assert.equal(adapter.provider, "bedrock");
+		assert.equal(adapter.supportsDiscovery, false);
+	});
+
+	it("returns a static adapter for unknown providers", () => {
+		const adapter = getDiscoveryAdapter("unknown-provider");
+		assert.equal(adapter.provider, "unknown-provider");
+		assert.equal(adapter.supportsDiscovery, false);
+	});
+
+	it("static adapter fetchModels returns empty array", async () => {
+		const adapter = getDiscoveryAdapter("anthropic");
+		const models = await adapter.fetchModels("key");
+		assert.deepEqual(models, []);
+	});
+});
+
+// ─── getDiscoverableProviders ────────────────────────────────────────────────
+
+describe("getDiscoverableProviders", () => {
+	it("returns only providers that support discovery", () => {
+		const providers = getDiscoverableProviders();
+		assert.ok(providers.includes("openai"));
+		assert.ok(providers.includes("ollama"));
+		assert.ok(providers.includes("openrouter"));
+		assert.ok(providers.includes("google"));
+		assert.ok(!providers.includes("anthropic"));
+		assert.ok(!providers.includes("bedrock"));
+	});
+
+	it("returns an array of strings", () => {
+		const providers = getDiscoverableProviders();
+		assert.ok(Array.isArray(providers));
+		for (const p of providers) {
+			assert.equal(typeof p, "string");
+		}
+	});
+});
+
+// ─── getDefaultTTL ───────────────────────────────────────────────────────────
+
+describe("getDefaultTTL", () => {
+	it("returns 5 minutes for ollama", () => {
+		assert.equal(getDefaultTTL("ollama"), 5 * 60 * 1000);
+	});
+
+	it("returns 1 hour for openai", () => {
+		assert.equal(getDefaultTTL("openai"), 60 * 60 * 1000);
+	});
+
+	it("returns 1 hour for google", () => {
+		assert.equal(getDefaultTTL("google"), 60 * 60 * 1000);
+	});
+
+	it("returns 1 hour for openrouter", () => {
+		assert.equal(getDefaultTTL("openrouter"), 60 * 60 * 1000);
+	});
+
+	it("returns 24 hours for unknown providers", () => {
+		assert.equal(getDefaultTTL("some-custom"), 24 * 60 * 60 * 1000);
+	});
+});
+
+// ─── DISCOVERY_TTLS ──────────────────────────────────────────────────────────
+
+describe("DISCOVERY_TTLS", () => {
+	it("has expected keys", () => {
+		assert.ok("ollama" in DISCOVERY_TTLS);
+		assert.ok("openai" in DISCOVERY_TTLS);
+		assert.ok("google" in DISCOVERY_TTLS);
+		assert.ok("openrouter" in DISCOVERY_TTLS);
+		assert.ok("default" in DISCOVERY_TTLS);
+	});
+
+	it("all values are positive numbers", () => {
+		for (const [, value] of Object.entries(DISCOVERY_TTLS)) {
+			assert.equal(typeof value, "number");
+			assert.ok(value > 0);
+		}
+	});
+});
--- a/packages/pi-coding-agent/src/core/model-discovery.ts
+++ b/packages/pi-coding-agent/src/core/model-discovery.ts
@ -0,0 +1,231 @@
+/**
+ * Provider discovery adapters for runtime model enumeration.
+ * Each adapter implements ProviderDiscoveryAdapter to fetch models from provider APIs.
+ */
+
+export interface DiscoveredModel {
+	id: string;
+	name?: string;
+	contextWindow?: number;
+	maxTokens?: number;
+	reasoning?: boolean;
+	input?: ("text" | "image")[];
+	cost?: { input: number; output: number; cacheRead: number; cacheWrite: number };
+}
+
+export interface DiscoveryResult {
+	provider: string;
+	models: DiscoveredModel[];
+	fetchedAt: number;
+	error?: string;
+}
+
+export interface ProviderDiscoveryAdapter {
+	provider: string;
+	supportsDiscovery: boolean;
+	fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]>;
+}
+
+/** Per-provider TTLs in milliseconds */
+export const DISCOVERY_TTLS: Record<string, number> = {
+	ollama: 5 * 60 * 1000, // 5 minutes (local, models change often)
+	openai: 60 * 60 * 1000, // 1 hour
+	google: 60 * 60 * 1000, // 1 hour
+	openrouter: 60 * 60 * 1000, // 1 hour
+	default: 24 * 60 * 60 * 1000, // 24 hours
+};
+
+export function getDefaultTTL(provider: string): number {
+	return DISCOVERY_TTLS[provider] ?? DISCOVERY_TTLS.default;
+}
+
+async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = 5000): Promise<Response> {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => controller.abort(), timeoutMs);
+	try {
+		return await fetch(url, { ...options, signal: controller.signal });
+	} finally {
+		clearTimeout(timeout);
+	}
+}
+
+// ─── OpenAI Adapter ──────────────────────────────────────────────────────────
+
+const OPENAI_EXCLUDED_PREFIXES = ["embedding", "tts", "dall-e", "whisper", "text-embedding", "davinci", "babbage"];
+
+class OpenAIDiscoveryAdapter implements ProviderDiscoveryAdapter {
+	provider = "openai";
+	supportsDiscovery = true;
+
+	async fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]> {
+		const url = `${baseUrl ?? "https://api.openai.com"}/v1/models`;
+		const response = await fetchWithTimeout(url, {
+			headers: { Authorization: `Bearer ${apiKey}` },
+		});
+
+		if (!response.ok) {
+			throw new Error(`OpenAI models API returned ${response.status}: ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as { data: Array<{ id: string; owned_by?: string }> };
+		return data.data
+			.filter((m) => !OPENAI_EXCLUDED_PREFIXES.some((prefix) => m.id.startsWith(prefix)))
+			.map((m) => ({
+				id: m.id,
+				name: m.id,
+				input: ["text" as const, "image" as const],
+			}));
+	}
+}
+
+// ─── Ollama Adapter ──────────────────────────────────────────────────────────
+
+class OllamaDiscoveryAdapter implements ProviderDiscoveryAdapter {
+	provider = "ollama";
+	supportsDiscovery = true;
+
+	async fetchModels(_apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]> {
+		const url = `${baseUrl ?? "http://localhost:11434"}/api/tags`;
+		const response = await fetchWithTimeout(url);
+
+		if (!response.ok) {
+			throw new Error(`Ollama tags API returned ${response.status}: ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as {
+			models: Array<{ name: string; size: number; details?: { parameter_size?: string } }>;
+		};
+
+		return (data.models ?? []).map((m) => ({
+			id: m.name,
+			name: m.name,
+			input: ["text" as const],
+		}));
+	}
+}
+
+// ─── OpenRouter Adapter ──────────────────────────────────────────────────────
+
+class OpenRouterDiscoveryAdapter implements ProviderDiscoveryAdapter {
+	provider = "openrouter";
+	supportsDiscovery = true;
+
+	async fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]> {
+		const url = `${baseUrl ?? "https://openrouter.ai"}/api/v1/models`;
+		const response = await fetchWithTimeout(url, {
+			headers: { Authorization: `Bearer ${apiKey}` },
+		});
+
+		if (!response.ok) {
+			throw new Error(`OpenRouter models API returned ${response.status}: ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as {
+			data: Array<{
+				id: string;
+				name: string;
+				context_length?: number;
+				top_provider?: { max_completion_tokens?: number };
+				pricing?: { prompt: string; completion: string };
+			}>;
+		};
+
+		return (data.data ?? []).map((m) => {
+			const cost =
+				m.pricing?.prompt !== undefined && m.pricing?.completion !== undefined
+					? {
+							input: parseFloat(m.pricing.prompt) * 1_000_000,
+							output: parseFloat(m.pricing.completion) * 1_000_000,
+							cacheRead: 0,
+							cacheWrite: 0,
+						}
+					: undefined;
+
+			return {
+				id: m.id,
+				name: m.name,
+				contextWindow: m.context_length,
+				maxTokens: m.top_provider?.max_completion_tokens,
+				cost,
+				input: ["text" as const, "image" as const],
+			};
+		});
+	}
+}
+
+// ─── Google/Gemini Adapter ───────────────────────────────────────────────────
+
+class GoogleDiscoveryAdapter implements ProviderDiscoveryAdapter {
+	provider = "google";
+	supportsDiscovery = true;
+
+	async fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]> {
+		const url = `${baseUrl ?? "https://generativelanguage.googleapis.com"}/v1beta/models?key=${apiKey}`;
+		const response = await fetchWithTimeout(url);
+
+		if (!response.ok) {
+			throw new Error(`Google models API returned ${response.status}: ${response.statusText}`);
+		}
+
+		const data = (await response.json()) as {
+			models: Array<{
+				name: string;
+				displayName: string;
+				supportedGenerationMethods?: string[];
+				inputTokenLimit?: number;
+				outputTokenLimit?: number;
+			}>;
+		};
+
+		return (data.models ?? [])
+			.filter((m) => m.supportedGenerationMethods?.includes("generateContent"))
+			.map((m) => ({
+				id: m.name.replace("models/", ""),
+				name: m.displayName,
+				contextWindow: m.inputTokenLimit,
+				maxTokens: m.outputTokenLimit,
+				input: ["text" as const, "image" as const],
+			}));
+	}
+}
+
+// ─── Static Adapter (no discovery) ───────────────────────────────────────────
+
+class StaticDiscoveryAdapter implements ProviderDiscoveryAdapter {
+	provider: string;
+	supportsDiscovery = false;
+
+	constructor(provider: string) {
+		this.provider = provider;
+	}
+
+	async fetchModels(): Promise<DiscoveredModel[]> {
+		return [];
+	}
+}
+
+// ─── Registry ────────────────────────────────────────────────────────────────
+
+const adapters: Record<string, ProviderDiscoveryAdapter> = {
+	openai: new OpenAIDiscoveryAdapter(),
+	ollama: new OllamaDiscoveryAdapter(),
+	openrouter: new OpenRouterDiscoveryAdapter(),
+	google: new GoogleDiscoveryAdapter(),
+	anthropic: new StaticDiscoveryAdapter("anthropic"),
+	bedrock: new StaticDiscoveryAdapter("bedrock"),
+	"azure-openai": new StaticDiscoveryAdapter("azure-openai"),
+	groq: new StaticDiscoveryAdapter("groq"),
+	cerebras: new StaticDiscoveryAdapter("cerebras"),
+	xai: new StaticDiscoveryAdapter("xai"),
+	mistral: new StaticDiscoveryAdapter("mistral"),
+};
+
+export function getDiscoveryAdapter(provider: string): ProviderDiscoveryAdapter {
+	return adapters[provider] ?? new StaticDiscoveryAdapter(provider);
+}
+
+export function getDiscoverableProviders(): string[] {
+	return Object.entries(adapters)
+		.filter(([, adapter]) => adapter.supportsDiscovery)
+		.map(([name]) => name);
+}
--- a/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts
+++ b/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts
@ -0,0 +1,135 @@
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, it } from "node:test";
+import { AuthStorage } from "./auth-storage.js";
+import { ModelDiscoveryCache } from "./discovery-cache.js";
+import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js";
+
+let testDir: string;
+
+beforeEach(() => {
+	testDir = join(tmpdir(), `model-registry-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+	mkdirSync(testDir, { recursive: true });
+});
+
+afterEach(() => {
+	try {
+		rmSync(testDir, { recursive: true, force: true });
+	} catch {
+		// Cleanup best-effort
+	}
+});
+
+// ─── discovery cache integration ─────────────────────────────────────────────
+
+describe("ModelDiscoveryCache — integration with discovery", () => {
+	it("cache respects provider-specific TTLs", () => {
+		const cachePath = join(testDir, "cache.json");
+		const cache = new ModelDiscoveryCache(cachePath);
+
+		cache.set("ollama", [{ id: "llama2" }]);
+		const entry = cache.get("ollama");
+		assert.ok(entry);
+		assert.equal(entry.ttlMs, getDefaultTTL("ollama"));
+	});
+
+	it("cache uses custom TTL when provided", () => {
+		const cachePath = join(testDir, "cache.json");
+		const cache = new ModelDiscoveryCache(cachePath);
+
+		cache.set("openai", [{ id: "gpt-4o" }], 999);
+		const entry = cache.get("openai");
+		assert.ok(entry);
+		assert.equal(entry.ttlMs, 999);
+	});
+});
+
+// ─── adapter resolution ─────────────────────────────────────────────────────
+
+describe("Discovery adapter resolution", () => {
+	it("all discoverable providers have adapters", () => {
+		const providers = getDiscoverableProviders();
+		for (const provider of providers) {
+			const adapter = getDiscoveryAdapter(provider);
+			assert.equal(adapter.supportsDiscovery, true, `${provider} should support discovery`);
+		}
+	});
+
+	it("static adapters return empty model lists", async () => {
+		const staticProviders = ["anthropic", "bedrock", "azure-openai", "groq", "cerebras"];
+		for (const provider of staticProviders) {
+			const adapter = getDiscoveryAdapter(provider);
+			assert.equal(adapter.supportsDiscovery, false, `${provider} should not support discovery`);
+			const models = await adapter.fetchModels("dummy-key");
+			assert.deepEqual(models, [], `${provider} should return empty models`);
+		}
+	});
+});
+
+// ─── AuthStorage hasAuth for discovery ───────────────────────────────────────
+
+describe("AuthStorage — hasAuth for discovery providers", () => {
+	it("returns false for providers without auth", () => {
+		const storage = AuthStorage.inMemory({});
+		assert.equal(storage.hasAuth("openai"), false);
+		assert.equal(storage.hasAuth("ollama"), false);
+	});
+
+	it("returns true for providers with stored keys", () => {
+		const storage = AuthStorage.inMemory({
+			openai: { type: "api_key" as const, key: "sk-test" },
+		});
+		assert.equal(storage.hasAuth("openai"), true);
+		assert.equal(storage.hasAuth("ollama"), false);
+	});
+});
+
+// ─── cache persistence across instances ──────────────────────────────────────
+
+describe("ModelDiscoveryCache — persistence", () => {
+	it("data survives across cache instances", () => {
+		const cachePath = join(testDir, "persist.json");
+
+		const cache1 = new ModelDiscoveryCache(cachePath);
+		cache1.set("openai", [
+			{ id: "gpt-4o", name: "GPT-4o", contextWindow: 128000 },
+			{ id: "gpt-4o-mini", name: "GPT-4o Mini" },
+		]);
+
+		const cache2 = new ModelDiscoveryCache(cachePath);
+		const entry = cache2.get("openai");
+		assert.ok(entry);
+		assert.equal(entry.models.length, 2);
+		assert.equal(entry.models[0].contextWindow, 128000);
+	});
+
+	it("clear persists across instances", () => {
+		const cachePath = join(testDir, "clear.json");
+
+		const cache1 = new ModelDiscoveryCache(cachePath);
+		cache1.set("openai", [{ id: "gpt-4o" }]);
+		cache1.clear("openai");
+
+		const cache2 = new ModelDiscoveryCache(cachePath);
+		assert.equal(cache2.get("openai"), undefined);
+	});
+});
+
+// ─── discovery TTL values ────────────────────────────────────────────────────
+
+describe("Discovery TTL configuration", () => {
+	it("ollama has shortest TTL (local models change often)", () => {
+		const ollamaTTL = getDefaultTTL("ollama");
+		const openaiTTL = getDefaultTTL("openai");
+		assert.ok(ollamaTTL < openaiTTL, "ollama TTL should be shorter than openai");
+	});
+
+	it("unknown providers get default TTL", () => {
+		const customTTL = getDefaultTTL("my-custom-provider");
+		const defaultTTL = getDefaultTTL("default");
+		// Unknown providers should get the same TTL as the explicit "default" key
+		assert.equal(customTTL, defaultTTL);
+	});
+});
--- a/packages/pi-coding-agent/src/core/model-registry.ts
+++ b/packages/pi-coding-agent/src/core/model-registry.ts
@ -24,6 +24,9 @@ import { existsSync, readFileSync } from "fs";
 import { join } from "path";
 import { getAgentDir } from "../config.js";
 import type { AuthStorage } from "./auth-storage.js";
+import { ModelDiscoveryCache } from "./discovery-cache.js";
+import type { DiscoveredModel, DiscoveryResult } from "./model-discovery.js";
+import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js";
 import { clearConfigValueCache, resolveConfigValue, resolveHeaders } from "./resolve-config-value.js";

 const Ajv = (AjvModule as any).default || AjvModule;
@ -221,6 +224,8 @@ export const clearApiKeyCache = clearConfigValueCache;
 */
 export class ModelRegistry {
 	private models: Model<Api>[] = [];
+	private discoveredModels: Model<Api>[] = [];
+	private discoveryCache: ModelDiscoveryCache;
 	private customProviderApiKeys: Map<string, string> = new Map();
 	private registeredProviders: Map<string, ProviderConfigInput> = new Map();
 	private loadError: string | undefined = undefined;
@ -229,6 +234,8 @@ export class ModelRegistry {
 		readonly authStorage: AuthStorage,
 		private modelsJsonPath: string | undefined = join(getAgentDir(), "models.json"),
 	) {
+		this.discoveryCache = new ModelDiscoveryCache();
+
 		// Set up fallback resolver for custom provider API keys
 		this.authStorage.setFallbackResolver((provider) => {
 			const keyConfig = this.customProviderApiKeys.get(provider);
@ -666,6 +673,106 @@ export class ModelRegistry {
 			});
 		}
 	}
+
+	/**
+	 * Discover models from all providers that support discovery.
+	 * Results are cached and merged into the registry (never overrides existing models).
+	 */
+	async discoverModels(providers?: string[]): Promise<DiscoveryResult[]> {
+		const targetProviders = providers ?? getDiscoverableProviders();
+		const results: DiscoveryResult[] = [];
+
+		for (const providerName of targetProviders) {
+			const adapter = getDiscoveryAdapter(providerName);
+			if (!adapter.supportsDiscovery) continue;
+
+			// Skip if cache is still fresh
+			if (!this.discoveryCache.isStale(providerName)) {
+				const cached = this.discoveryCache.get(providerName);
+				if (cached) {
+					results.push({
+						provider: providerName,
+						models: cached.models,
+						fetchedAt: cached.fetchedAt,
+					});
+					continue;
+				}
+			}
+
+			try {
+				const apiKey = await this.authStorage.getApiKey(providerName);
+				if (!apiKey && providerName !== "ollama") continue;
+
+				const models = await adapter.fetchModels(apiKey ?? "", undefined);
+				this.discoveryCache.set(providerName, models);
+				results.push({
+					provider: providerName,
+					models,
+					fetchedAt: Date.now(),
+				});
+			} catch (error) {
+				results.push({
+					provider: providerName,
+					models: [],
+					fetchedAt: Date.now(),
+					error: error instanceof Error ? error.message : String(error),
+				});
+			}
+		}
+
+		// Convert and merge discovered models
+		this.discoveredModels = this.convertDiscoveredModels(results);
+		return results;
+	}
+
+	/**
+	 * Get all models including discovered ones.
+	 * Discovered models are appended but never override existing models.
+	 */
+	getAllWithDiscovered(): Model<Api>[] {
+		const existingIds = new Set(this.models.map((m) => `${m.provider}/${m.id}`));
+		const unique = this.discoveredModels.filter((m) => !existingIds.has(`${m.provider}/${m.id}`));
+		return [...this.models, ...unique];
+	}
+
+	/**
+	 * Check if a model was added via discovery (not built-in or custom).
+	 */
+	isDiscovered(model: Model<Api>): boolean {
+		return this.discoveredModels.some((m) => m.provider === model.provider && m.id === model.id);
+	}
+
+	/**
+	 * Get the discovery cache instance.
+	 */
+	getDiscoveryCache(): ModelDiscoveryCache {
+		return this.discoveryCache;
+	}
+
+	/**
+	 * Convert DiscoveryResult[] into Model<Api>[] with default values.
+	 */
+	private convertDiscoveredModels(results: DiscoveryResult[]): Model<Api>[] {
+		const converted: Model<Api>[] = [];
+		for (const result of results) {
+			if (result.error) continue;
+			for (const dm of result.models) {
+				converted.push({
+					id: dm.id,
+					name: dm.name ?? dm.id,
+					api: "openai" as Api,
+					provider: result.provider,
+					baseUrl: "",
+					reasoning: dm.reasoning ?? false,
+					input: dm.input ?? ["text"],
+					cost: dm.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+					contextWindow: dm.contextWindow ?? 128000,
+					maxTokens: dm.maxTokens ?? 16384,
+				} as Model<Api>);
+			}
+		}
+		return converted;
+	}
 }

 /**
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@ -13,7 +13,7 @@ import type { ModelRegistry } from "./model-registry.js";
 /** Default model IDs for each known provider */
 export const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
-	anthropic: "claude-opus-4-6",
+	anthropic: "claude-opus-4-6[1m]",
 	openai: "gpt-5.4",
 	"azure-openai-responses": "gpt-5.2",
 	"openai-codex": "gpt-5.4",
@ -23,7 +23,7 @@ export const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"google-vertex": "gemini-3-pro-preview",
 	"github-copilot": "gpt-4o",
 	openrouter: "openai/gpt-5.1-codex",
-	"vercel-ai-gateway": "anthropic/claude-opus-4-6",
+	"vercel-ai-gateway": "anthropic/claude-opus-4-6[1m]",
 	xai: "grok-4-fast-non-reasoning",
 	groq: "openai/gpt-oss-120b",
 	cerebras: "zai-glm-4.6",
--- a/packages/pi-coding-agent/src/core/models-json-writer.test.ts
+++ b/packages/pi-coding-agent/src/core/models-json-writer.test.ts
@ -0,0 +1,145 @@
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, it } from "node:test";
+import { ModelsJsonWriter } from "./models-json-writer.js";
+
+let testDir: string;
+let modelsJsonPath: string;
+
+beforeEach(() => {
+	testDir = join(tmpdir(), `models-json-writer-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+	mkdirSync(testDir, { recursive: true });
+	modelsJsonPath = join(testDir, "models.json");
+});
+
+afterEach(() => {
+	try {
+		rmSync(testDir, { recursive: true, force: true });
+	} catch {
+		// Cleanup best-effort
+	}
+});
+
+function readModels(): Record<string, unknown> {
+	return JSON.parse(readFileSync(modelsJsonPath, "utf-8"));
+}
+
+// ─── addModel ────────────────────────────────────────────────────────────────
+
+describe("ModelsJsonWriter — addModel", () => {
+	it("creates file and adds model to new provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.addModel("openai", { id: "gpt-4o", name: "GPT-4o" }, { baseUrl: "https://api.openai.com", apiKey: "env:OPENAI_API_KEY", api: "openai" });
+
+		const config = readModels() as any;
+		assert.ok(config.providers.openai);
+		assert.equal(config.providers.openai.models.length, 1);
+		assert.equal(config.providers.openai.models[0].id, "gpt-4o");
+	});
+
+	it("appends model to existing provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.addModel("openai", { id: "gpt-4o" }, { baseUrl: "https://api.openai.com", apiKey: "env:OPENAI_API_KEY", api: "openai" });
+		writer.addModel("openai", { id: "gpt-4o-mini" });
+
+		const config = readModels() as any;
+		assert.equal(config.providers.openai.models.length, 2);
+	});
+
+	it("replaces model with same id", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.addModel("openai", { id: "gpt-4o", name: "Old" }, { baseUrl: "https://api.openai.com", apiKey: "env:OPENAI_API_KEY", api: "openai" });
+		writer.addModel("openai", { id: "gpt-4o", name: "New" });
+
+		const config = readModels() as any;
+		assert.equal(config.providers.openai.models.length, 1);
+		assert.equal(config.providers.openai.models[0].name, "New");
+	});
+});
+
+// ─── removeModel ─────────────────────────────────────────────────────────────
+
+describe("ModelsJsonWriter — removeModel", () => {
+	it("removes a model from provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.addModel("openai", { id: "gpt-4o" }, { baseUrl: "https://api.openai.com", apiKey: "env:OPENAI_API_KEY", api: "openai" });
+		writer.addModel("openai", { id: "gpt-4o-mini" });
+
+		writer.removeModel("openai", "gpt-4o");
+
+		const config = readModels() as any;
+		assert.equal(config.providers.openai.models.length, 1);
+		assert.equal(config.providers.openai.models[0].id, "gpt-4o-mini");
+	});
+
+	it("removes provider when last model is removed", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.addModel("openai", { id: "gpt-4o" }, { baseUrl: "https://api.openai.com", apiKey: "env:OPENAI_API_KEY", api: "openai" });
+
+		writer.removeModel("openai", "gpt-4o");
+
+		const config = readModels() as any;
+		assert.equal(config.providers.openai, undefined);
+	});
+
+	it("handles removing from nonexistent provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		// Should not throw
+		writer.removeModel("nonexistent", "model-id");
+	});
+});
+
+// ─── setProvider / removeProvider ────────────────────────────────────────────
+
+describe("ModelsJsonWriter — provider operations", () => {
+	it("sets a provider configuration", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.setProvider("custom", {
+			baseUrl: "http://localhost:8080",
+			apiKey: "test-key",
+			api: "openai",
+			models: [{ id: "local-model" }],
+		});
+
+		const config = readModels() as any;
+		assert.ok(config.providers.custom);
+		assert.equal(config.providers.custom.baseUrl, "http://localhost:8080");
+	});
+
+	it("removes a provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.setProvider("custom", { baseUrl: "http://localhost:8080" });
+		writer.removeProvider("custom");
+
+		const config = readModels() as any;
+		assert.equal(config.providers.custom, undefined);
+	});
+
+	it("handles removing nonexistent provider", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.removeProvider("nonexistent");
+		// Should not throw
+	});
+});
+
+// ─── listProviders ───────────────────────────────────────────────────────────
+
+describe("ModelsJsonWriter — listProviders", () => {
+	it("returns empty config when file does not exist", () => {
+		const writer = new ModelsJsonWriter(join(testDir, "nonexistent.json"));
+		const config = writer.listProviders();
+		assert.deepEqual(config, { providers: {} });
+	});
+
+	it("returns current provider config", () => {
+		const writer = new ModelsJsonWriter(modelsJsonPath);
+		writer.setProvider("openai", { baseUrl: "https://api.openai.com" });
+		writer.setProvider("ollama", { baseUrl: "http://localhost:11434" });
+
+		const config = writer.listProviders();
+		assert.ok(config.providers.openai);
+		assert.ok(config.providers.ollama);
+	});
+});
--- a/packages/pi-coding-agent/src/core/models-json-writer.ts
+++ b/packages/pi-coding-agent/src/core/models-json-writer.ts
@ -0,0 +1,188 @@
+/**
+ * Safe read-modify-write for models.json with file locking.
+ * Prevents concurrent writes from corrupting the config file.
+ */
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
+import { dirname, join } from "path";
+import lockfile from "proper-lockfile";
+import { getAgentDir } from "../config.js";
+
+interface ModelDefinition {
+	id: string;
+	name?: string;
+	api?: string;
+	baseUrl?: string;
+	reasoning?: boolean;
+	input?: ("text" | "image")[];
+	cost?: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow?: number;
+	maxTokens?: number;
+}
+
+interface ProviderConfig {
+	baseUrl?: string;
+	apiKey?: string;
+	api?: string;
+	headers?: Record<string, string>;
+	authHeader?: boolean;
+	models?: ModelDefinition[];
+	modelOverrides?: Record<string, Record<string, unknown>>;
+}
+
+interface ModelsConfig {
+	providers: Record<string, ProviderConfig>;
+}
+
+export class ModelsJsonWriter {
+	private modelsJsonPath: string;
+
+	constructor(modelsJsonPath?: string) {
+		this.modelsJsonPath = modelsJsonPath ?? join(getAgentDir(), "models.json");
+	}
+
+	/**
+	 * Add a model to a provider. Creates the provider if it doesn't exist.
+	 */
+	addModel(provider: string, model: ModelDefinition, providerConfig?: Partial<ProviderConfig>): void {
+		this.withLock((config) => {
+			if (!config.providers[provider]) {
+				config.providers[provider] = {
+					...providerConfig,
+					models: [],
+				};
+			}
+
+			const providerEntry = config.providers[provider];
+			if (!providerEntry.models) {
+				providerEntry.models = [];
+			}
+
+			// Replace existing model with same id, or append
+			const existingIndex = providerEntry.models.findIndex((m) => m.id === model.id);
+			if (existingIndex >= 0) {
+				providerEntry.models[existingIndex] = model;
+			} else {
+				providerEntry.models.push(model);
+			}
+
+			return config;
+		});
+	}
+
+	/**
+	 * Remove a model from a provider. Removes the provider if no models remain.
+	 */
+	removeModel(provider: string, modelId: string): void {
+		this.withLock((config) => {
+			const providerEntry = config.providers[provider];
+			if (!providerEntry?.models) return config;
+
+			providerEntry.models = providerEntry.models.filter((m) => m.id !== modelId);
+
+			// Clean up empty provider (no models and no overrides)
+			if (providerEntry.models.length === 0 && !providerEntry.modelOverrides) {
+				delete config.providers[provider];
+			}
+
+			return config;
+		});
+	}
+
+	/**
+	 * Set or update an entire provider configuration.
+	 */
+	setProvider(provider: string, providerConfig: ProviderConfig): void {
+		this.withLock((config) => {
+			config.providers[provider] = providerConfig;
+			return config;
+		});
+	}
+
+	/**
+	 * Remove a provider and all its models.
+	 */
+	removeProvider(provider: string): void {
+		this.withLock((config) => {
+			delete config.providers[provider];
+			return config;
+		});
+	}
+
+	/**
+	 * List all providers and their configurations.
+	 */
+	listProviders(): ModelsConfig {
+		return this.readConfig();
+	}
+
+	private readConfig(): ModelsConfig {
+		if (!existsSync(this.modelsJsonPath)) {
+			return { providers: {} };
+		}
+		try {
+			const content = readFileSync(this.modelsJsonPath, "utf-8");
+			return JSON.parse(content) as ModelsConfig;
+		} catch {
+			return { providers: {} };
+		}
+	}
+
+	private writeConfig(config: ModelsConfig): void {
+		const dir = dirname(this.modelsJsonPath);
+		if (!existsSync(dir)) {
+			mkdirSync(dir, { recursive: true });
+		}
+		writeFileSync(this.modelsJsonPath, JSON.stringify(config, null, 2), "utf-8");
+	}
+
+	private acquireLockWithRetry(): () => void {
+		const maxAttempts = 10;
+		const delayMs = 20;
+		let lastError: unknown;
+
+		// Ensure file exists for locking
+		const dir = dirname(this.modelsJsonPath);
+		if (!existsSync(dir)) {
+			mkdirSync(dir, { recursive: true });
+		}
+		if (!existsSync(this.modelsJsonPath)) {
+			writeFileSync(this.modelsJsonPath, JSON.stringify({ providers: {} }, null, 2), "utf-8");
+		}
+
+		for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+			try {
+				return lockfile.lockSync(this.modelsJsonPath, { realpath: false });
+			} catch (error) {
+				const code =
+					typeof error === "object" && error !== null && "code" in error
+						? String((error as { code?: unknown }).code)
+						: undefined;
+				if (code !== "ELOCKED" || attempt === maxAttempts) {
+					throw error;
+				}
+				lastError = error;
+				const start = Date.now();
+				while (Date.now() - start < delayMs) {
+					// Busy-wait (same pattern as auth-storage.ts)
+				}
+			}
+		}
+
+		throw (lastError as Error) ?? new Error("Failed to acquire models.json lock");
+	}
+
+	private withLock(fn: (config: ModelsConfig) => ModelsConfig): void {
+		let release: (() => void) | undefined;
+		try {
+			release = this.acquireLockWithRetry();
+			const config = this.readConfig();
+			const updated = fn(config);
+			this.writeConfig(updated);
+		} finally {
+			if (release) {
+				release();
+			}
+		}
+	}
+}
--- a/packages/pi-coding-agent/src/core/settings-manager.ts
+++ b/packages/pi-coding-agent/src/core/settings-manager.ts
@ -79,6 +79,13 @@ export interface FallbackSettings {
 	chains?: Record<string, FallbackChainEntry[]>; // keyed by chain name
 }

+export interface ModelDiscoverySettings {
+	enabled?: boolean; // default: false
+	providers?: string[]; // limit discovery to specific providers
+	ttlMinutes?: number; // override default TTLs (in minutes)
+	autoRefreshOnModelSelect?: boolean; // default: false - refresh discovery when opening model selector
+}
+
 export type TransportSetting = Transport;

 /**
@ -134,6 +141,7 @@ export interface Settings {
 	bashInterceptor?: BashInterceptorSettings;
 	taskIsolation?: TaskIsolationSettings;
 	fallback?: FallbackSettings;
+	modelDiscovery?: ModelDiscoverySettings;
 }

 /** Deep merge settings: project/overrides take precedence, nested objects merge recursively */
@ -1076,4 +1084,17 @@ export class SettingsManager {
 			chains: this.getFallbackChains(),
 		};
 	}
+
+	getModelDiscoverySettings(): ModelDiscoverySettings {
+		return this.settings.modelDiscovery ?? {};
+	}
+
+	setModelDiscoveryEnabled(enabled: boolean): void {
+		if (!this.globalSettings.modelDiscovery) {
+			this.globalSettings.modelDiscovery = {};
+		}
+		this.globalSettings.modelDiscovery.enabled = enabled;
+		this.markModified("modelDiscovery", "enabled");
+		this.save();
+	}
 }
--- a/packages/pi-coding-agent/src/core/slash-commands.ts
+++ b/packages/pi-coding-agent/src/core/slash-commands.ts
@ -28,6 +28,7 @@ export const BUILTIN_SLASH_COMMANDS: ReadonlyArray<BuiltinSlashCommand> = [
 	{ name: "hotkeys", description: "Show all keyboard shortcuts" },
 	{ name: "fork", description: "Create a new fork from a previous message" },
 	{ name: "tree", description: "Navigate session tree (switch branches)" },
+	{ name: "provider", description: "Manage provider configuration" },
 	{ name: "login", description: "Login with OAuth provider" },
 	{ name: "logout", description: "Logout from OAuth provider" },
 	{ name: "new", description: "Start a new session" },
--- a/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
+++ b/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
@ -0,0 +1,85 @@
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { describe, it } from "node:test";
+
+import {
+	computeEditDiff,
+	fuzzyFindText,
+	generateDiffString,
+	normalizeForFuzzyMatch,
+} from "./edit-diff.js";
+
+describe("edit-diff", () => {
+	it("normalizes quotes, dashes, spaces, and trailing whitespace", () => {
+		const input = "“hello”\u00A0world — test  \nnext\t\t\n";
+		assert.equal(normalizeForFuzzyMatch(input), "\"hello\" world - test\nnext\n");
+	});
+
+	it("falls back to fuzzy matching when unicode punctuation differs", () => {
+		const result = fuzzyFindText("const title = “Hello”;\n", "const title = \"Hello\";\n");
+		assert.equal(result.found, true);
+		assert.equal(result.usedFuzzyMatch, true);
+		assert.equal(result.contentForReplacement, "const title = \"Hello\";\n");
+	});
+
+	it("renders numbered diffs with the first changed line", () => {
+		const result = generateDiffString("line 1\nline 2\nline 3\n", "line 1\nline two\nline 3\n");
+		assert.equal(result.firstChangedLine, 2);
+		assert.match(result.diff, /-2 line 2/);
+		assert.match(result.diff, /\+2 line two/);
+	});
+
+	it("respects contextLines and inserts separators for distant changes", () => {
+		const lines = Array.from({ length: 20 }, (_, i) => `line ${i + 1}`);
+		const oldContent = lines.join("\n") + "\n";
+		const modified = [...lines];
+		modified[1] = "changed 2"; // line 2
+		modified[17] = "changed 18"; // line 18
+		const newContent = modified.join("\n") + "\n";
+
+		const result = generateDiffString(oldContent, newContent, 2);
+		// Should contain separator between the two distant change regions
+		assert.match(result.diff, /\.\.\./);
+		// Should NOT contain lines far from changes (e.g. line 10)
+		assert.doesNotMatch(result.diff, /line 10/);
+		// Should contain the changed lines
+		assert.match(result.diff, /changed 2/);
+		assert.match(result.diff, /changed 18/);
+	});
+
+	it("handles large files without OOM by falling back to linear diff", () => {
+		// Create files large enough to exceed the DP threshold
+		const lineCount = 3000;
+		const oldLines = Array.from({ length: lineCount }, (_, i) => `line ${i}`);
+		const newLines = [...oldLines];
+		newLines[1500] = "CHANGED";
+		const result = generateDiffString(oldLines.join("\n") + "\n", newLines.join("\n") + "\n");
+		assert.ok(result.firstChangedLine !== undefined);
+		assert.match(result.diff, /CHANGED/);
+	});
+
+	it("computes diffs for preview without native helpers", async () => {
+		const dir = mkdtempSync(join(tmpdir(), "edit-diff-test-"));
+		try {
+			const file = join(dir, "sample.ts");
+			writeFileSync(file, "const title = “Hello”;\n", "utf-8");
+
+			const result = await computeEditDiff(
+				file,
+				"const title = \"Hello\";\n",
+				"const title = \"Hi\";\n",
+				dir,
+			);
+
+			assert.ok(!("error" in result), "expected a diff result");
+			if (!("error" in result)) {
+				assert.equal(result.firstChangedLine, 1);
+				assert.match(result.diff, /\+1 const title = "Hi";/);
+			}
+		} finally {
+			rmSync(dir, { recursive: true, force: true });
+		}
+	});
+});
--- a/packages/pi-coding-agent/src/core/tools/edit-diff.ts
+++ b/packages/pi-coding-agent/src/core/tools/edit-diff.ts
@ -2,15 +2,11 @@
 * Shared diff computation utilities for the edit tool.
 * Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering).
 *
- * Hot-path functions (fuzzyFindText, normalizeForFuzzyMatch, generateDiffString)
- * delegate to the native Rust engine for performance on large files.
+ * These helpers intentionally stay in JavaScript. Issue #453 showed that
+ * post-tool preview paths must not depend on the native addon because a native
+ * hang there can wedge the entire interactive session after a successful tool run.
 */

-import {
-	fuzzyFindText as nativeFuzzyFindText,
-	generateDiff as nativeGenerateDiff,
-	normalizeForFuzzyMatch as nativeNormalizeForFuzzyMatch,
-} from "@gsd/native";
 import { constants } from "fs";
 import { access, readFile } from "fs/promises";
 import { resolveToCwd } from "./path-utils.js";
@ -32,14 +28,23 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
 }

 /**
- * Normalize text for fuzzy matching (native Rust implementation).
+ * Normalize text for fuzzy matching.
 * - Strip trailing whitespace from each line
 * - Normalize smart quotes to ASCII equivalents
 * - Normalize Unicode dashes/hyphens to ASCII hyphen
 * - Normalize special Unicode spaces to regular space
 */
 export function normalizeForFuzzyMatch(text: string): string {
-	return nativeNormalizeForFuzzyMatch(text);
+	return text
+		.replace(/\r\n/g, "\n")
+		.replace(/\r/g, "\n")
+		.replace(/[“”]/g, '"')
+		.replace(/[‘’]/g, "'")
+		.replace(/[‐‑‒–—−]/g, "-")
+		.replace(/[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, " ")
+		.split("\n")
+		.map((line) => line.replace(/[ \t]+$/g, ""))
+		.join("\n");
 }

 export interface FuzzyMatchResult {
@ -59,14 +64,44 @@ export interface FuzzyMatchResult {
 }

 /**
- * Find oldText in content, trying exact match first, then fuzzy match
- * (native Rust implementation).
+ * Find oldText in content, trying exact match first, then fuzzy match.
 *
 * When fuzzy matching is used, the returned contentForReplacement is the
 * fuzzy-normalized version of the content.
 */
 export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
-	return nativeFuzzyFindText(content, oldText);
+	const exactIndex = content.indexOf(oldText);
+	if (exactIndex !== -1) {
+		return {
+			found: true,
+			index: exactIndex,
+			matchLength: oldText.length,
+			usedFuzzyMatch: false,
+			contentForReplacement: content,
+		};
+	}
+
+	const normalizedContent = normalizeForFuzzyMatch(content);
+	const normalizedOldText = normalizeForFuzzyMatch(oldText);
+	const fuzzyIndex = normalizedContent.indexOf(normalizedOldText);
+
+	if (fuzzyIndex === -1) {
+		return {
+			found: false,
+			index: -1,
+			matchLength: 0,
+			usedFuzzyMatch: false,
+			contentForReplacement: content,
+		};
+	}
+
+	return {
+		found: true,
+		index: fuzzyIndex,
+		matchLength: normalizedOldText.length,
+		usedFuzzyMatch: true,
+		contentForReplacement: normalizedContent,
+	};
 }

 /** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
@ -75,20 +110,81 @@ export function stripBom(content: string): { bom: string; text: string } {
 }

 /**
- * Generate a unified diff string with line numbers and context
- * (native Rust implementation using Myers' algorithm via the `similar` crate).
+ * Generate a unified diff string with line numbers and context.
 *
 * Returns both the diff string and the first changed line number (in the new file).
+ * Only lines within `contextLines` of a change are included (like unified diff).
 */
 export function generateDiffString(
 	oldContent: string,
 	newContent: string,
 	contextLines = 4,
 ): { diff: string; firstChangedLine: number | undefined } {
-	const result = nativeGenerateDiff(oldContent, newContent, contextLines);
+	const ops = buildLineDiff(oldContent, newContent);
+	let firstChangedLine: number | undefined;
+
+	// First pass: assign line numbers and find changed indices
+	const annotated: { op: LineDiffOp; oldLine: number; newLine: number }[] = [];
+	let oldLine = 1;
+	let newLine = 1;
+	const changedIndices: number[] = [];
+
+	for (let idx = 0; idx < ops.length; idx++) {
+		const op = ops[idx];
+		annotated.push({ op, oldLine, newLine });
+
+		if (op.type !== "context") {
+			changedIndices.push(idx);
+			if (firstChangedLine === undefined) {
+				firstChangedLine = newLine;
+			}
+		}
+
+		if (op.type === "remove") {
+			oldLine += 1;
+		} else if (op.type === "add") {
+			newLine += 1;
+		} else {
+			oldLine += 1;
+			newLine += 1;
+		}
+	}
+
+	// Build set of indices to include (changes + surrounding context)
+	const includeSet = new Set<number>();
+	for (const ci of changedIndices) {
+		for (let k = Math.max(0, ci - contextLines); k <= Math.min(ops.length - 1, ci + contextLines); k++) {
+			includeSet.add(k);
+		}
+	}
+
+	const maxLine = Math.max(oldLine - 1, newLine - 1, 1);
+	const lineNumberWidth = String(maxLine).length;
+	const rendered: string[] = [];
+	let lastIncluded = -1;
+
+	for (let idx = 0; idx < annotated.length; idx++) {
+		if (!includeSet.has(idx)) continue;
+
+		// Insert separator when there's a gap between included regions
+		if (lastIncluded !== -1 && idx > lastIncluded + 1) {
+			rendered.push("...");
+		}
+		lastIncluded = idx;
+
+		const { op, oldLine: ol, newLine: nl } = annotated[idx];
+		if (op.type === "context") {
+			rendered.push(` ${String(nl).padStart(lineNumberWidth, " ")} ${op.line}`);
+		} else if (op.type === "remove") {
+			rendered.push(`-${String(ol).padStart(lineNumberWidth, " ")} ${op.line}`);
+		} else {
+			rendered.push(`+${String(nl).padStart(lineNumberWidth, " ")} ${op.line}`);
+		}
+	}
+
 	return {
-		diff: result.diff,
-		firstChangedLine: result.firstChangedLine ?? undefined,
+		diff: rendered.join("\n"),
+		firstChangedLine,
 	};
 }

@ -101,6 +197,138 @@ export interface EditDiffError {
 	error: string;
 }

+type LineDiffOp =
+	| { type: "context"; line: string }
+	| { type: "remove"; line: string }
+	| { type: "add"; line: string };
+
+function splitLines(text: string): string[] {
+	const lines = text.split("\n");
+	if (lines.length > 0 && lines.at(-1) === "") {
+		lines.pop();
+	}
+	return lines;
+}
+
+/**
+ * Maximum number of cells (oldLines * newLines) before we switch from the
+ * full LCS DP algorithm to a simpler linear-scan diff. This prevents OOM
+ * on large files (e.g. 10k lines would need a 100M-cell matrix).
+ */
+const MAX_DP_CELLS = 4_000_000; // ~32 MB for 64-bit numbers
+
+function buildLineDiff(oldContent: string, newContent: string): LineDiffOp[] {
+	const oldLines = splitLines(oldContent);
+	const newLines = splitLines(newContent);
+
+	const cells = (oldLines.length + 1) * (newLines.length + 1);
+	if (cells > MAX_DP_CELLS) {
+		return buildLineDiffLinear(oldLines, newLines);
+	}
+
+	return buildLineDiffLCS(oldLines, newLines);
+}
+
+/**
+ * Full LCS-based diff using O(n*m) DP table. Produces optimal diffs but
+ * is only safe for files where n*m <= MAX_DP_CELLS.
+ */
+function buildLineDiffLCS(oldLines: string[], newLines: string[]): LineDiffOp[] {
+	const dp: number[][] = Array.from({ length: oldLines.length + 1 }, () =>
+		Array<number>(newLines.length + 1).fill(0),
+	);
+
+	for (let i = oldLines.length - 1; i >= 0; i--) {
+		for (let j = newLines.length - 1; j >= 0; j--) {
+			if (oldLines[i] === newLines[j]) {
+				dp[i][j] = dp[i + 1][j + 1] + 1;
+			} else {
+				dp[i][j] = Math.max(dp[i + 1][j], dp[i][j + 1]);
+			}
+		}
+	}
+
+	const ops: LineDiffOp[] = [];
+	let i = 0;
+	let j = 0;
+
+	while (i < oldLines.length && j < newLines.length) {
+		if (oldLines[i] === newLines[j]) {
+			ops.push({ type: "context", line: oldLines[i] });
+			i += 1;
+			j += 1;
+			continue;
+		}
+
+		if (dp[i + 1][j] >= dp[i][j + 1]) {
+			ops.push({ type: "remove", line: oldLines[i] });
+			i += 1;
+		} else {
+			ops.push({ type: "add", line: newLines[j] });
+			j += 1;
+		}
+	}
+
+	while (i < oldLines.length) {
+		ops.push({ type: "remove", line: oldLines[i] });
+		i += 1;
+	}
+
+	while (j < newLines.length) {
+		ops.push({ type: "add", line: newLines[j] });
+		j += 1;
+	}
+
+	return ops;
+}
+
+/**
+ * Linear-time fallback diff for large files. Matches common prefix/suffix,
+ * then treats the remaining middle as a bulk remove+add. Not optimal but
+ * O(n+m) in both time and space.
+ */
+function buildLineDiffLinear(oldLines: string[], newLines: string[]): LineDiffOp[] {
+	const ops: LineDiffOp[] = [];
+
+	// Match common prefix
+	let prefixLen = 0;
+	const minLen = Math.min(oldLines.length, newLines.length);
+	while (prefixLen < minLen && oldLines[prefixLen] === newLines[prefixLen]) {
+		prefixLen++;
+	}
+
+	// Match common suffix (not overlapping with prefix)
+	let suffixLen = 0;
+	while (
+		suffixLen < minLen - prefixLen &&
+		oldLines[oldLines.length - 1 - suffixLen] === newLines[newLines.length - 1 - suffixLen]
+	) {
+		suffixLen++;
+	}
+
+	// Emit prefix context
+	for (let i = 0; i < prefixLen; i++) {
+		ops.push({ type: "context", line: oldLines[i] });
+	}
+
+	// Emit removed lines from the middle
+	for (let i = prefixLen; i < oldLines.length - suffixLen; i++) {
+		ops.push({ type: "remove", line: oldLines[i] });
+	}
+
+	// Emit added lines from the middle
+	for (let j = prefixLen; j < newLines.length - suffixLen; j++) {
+		ops.push({ type: "add", line: newLines[j] });
+	}
+
+	// Emit suffix context
+	for (let i = oldLines.length - suffixLen; i < oldLines.length; i++) {
+		ops.push({ type: "context", line: oldLines[i] });
+	}
+
+	return ops;
+}
+
 /**
 * Compute the diff for an edit operation without applying it.
 * Used for preview rendering in the TUI before the tool executes.
--- a/packages/pi-coding-agent/src/index.ts
+++ b/packages/pi-coding-agent/src/index.ts
@ -143,7 +143,11 @@ export {
 // Footer data provider (git branch + extension statuses - data not otherwise available to extensions)
 export type { ReadonlyFooterDataProvider } from "./core/footer-data-provider.js";
 export { convertToLlm } from "./core/messages.js";
+export { ModelDiscoveryCache } from "./core/discovery-cache.js";
+export type { DiscoveredModel, DiscoveryResult, ProviderDiscoveryAdapter } from "./core/model-discovery.js";
+export { getDiscoverableProviders, getDiscoveryAdapter } from "./core/model-discovery.js";
 export { ModelRegistry } from "./core/model-registry.js";
+export { ModelsJsonWriter } from "./core/models-json-writer.js";
 export type {
 	PackageManager,
 	PathMetadata,
@ -307,6 +311,7 @@ export {
 	LoginDialogComponent,
 	ModelSelectorComponent,
 	OAuthSelectorComponent,
+	ProviderManagerComponent,
 	type RenderDiffOptions,
 	rawKeyHint,
 	renderDiff,
--- a/packages/pi-coding-agent/src/main.ts
+++ b/packages/pi-coding-agent/src/main.ts
@ -11,7 +11,7 @@ import { createInterface } from "readline";
 import { type Args, parseArgs, printHelp } from "./cli/args.js";
 import { selectConfig } from "./cli/config-selector.js";
 import { processFileArguments } from "./cli/file-processor.js";
-import { listModels } from "./cli/list-models.js";
+import { discoverAndPrintModels, listModels } from "./cli/list-models.js";
 import { selectSession } from "./cli/session-picker.js";
 import { APP_NAME, getAgentDir, getModelsPath, VERSION } from "./config.js";
 import { AuthStorage } from "./core/auth-storage.js";
@ -660,9 +660,26 @@ export async function main(args: string[]) {
 		process.exit(0);
 	}

+	if (parsed.addProvider) {
+		const { ModelsJsonWriter } = await import("./core/models-json-writer.js");
+		const writer = new ModelsJsonWriter();
+		writer.setProvider(parsed.addProvider, {
+			baseUrl: parsed.addProviderBaseUrl,
+			apiKey: parsed.apiKey,
+		});
+		console.log(`Provider "${parsed.addProvider}" added to models.json`);
+		process.exit(0);
+	}
+
+	if (parsed.discoverModels !== undefined) {
+		const provider = typeof parsed.discoverModels === "string" ? parsed.discoverModels : undefined;
+		await discoverAndPrintModels(modelRegistry, provider);
+		process.exit(0);
+	}
+
 	if (parsed.listModels !== undefined) {
 		const searchPattern = typeof parsed.listModels === "string" ? parsed.listModels : undefined;
-		await listModels(modelRegistry, searchPattern);
+		await listModels(modelRegistry, { searchPattern, discover: parsed.discover });
 		process.exit(0);
 	}

--- a/packages/pi-coding-agent/src/modes/interactive/components/custom-editor.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/custom-editor.ts
@ -1,4 +1,4 @@
-import { Editor, type EditorOptions, type EditorTheme, type TUI } from "@gsd/pi-tui";
+import { Editor, type EditorOptions, type EditorTheme, type TUI, isKittyProtocolActive } from "@gsd/pi-tui";
 import type { AppAction, KeybindingsManager } from "../../../core/keybindings.js";

 /**
@ -69,6 +69,13 @@ export class CustomEditor extends Editor {
 		// Check all other app actions
 		for (const [action, handler] of this.actionHandlers) {
 			if (action !== "interrupt" && action !== "exit" && this.keybindings.matches(data, action)) {
+				// When kitty protocol is not active, \x1b\r is ambiguous:
+				// it could be alt+enter (followUp) or shift+enter mapped via /terminal-setup.
+				// Prioritize newLine since that's what terminal-setup configures.
+				// Alt+enter followUp still works in kitty-protocol terminals.
+				if (action === "followUp" && !isKittyProtocolActive() && data === "\x1b\r") {
+					break; // Fall through to parent editor's newLine handling
+				}
 				handler();
 				return;
 			}
--- a/packages/pi-coding-agent/src/modes/interactive/components/index.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/index.ts
@ -18,6 +18,7 @@ export { appKey, appKeyHint, editorKey, keyHint, rawKeyHint } from "./keybinding
 export { LoginDialogComponent } from "./login-dialog.js";
 export { ModelSelectorComponent } from "./model-selector.js";
 export { OAuthSelectorComponent } from "./oauth-selector.js";
+export { ProviderManagerComponent } from "./provider-manager.js";
 export { type ModelsCallbacks, type ModelsConfig, ScopedModelsSelectorComponent } from "./scoped-models-selector.js";
 export { SessionSelectorComponent } from "./session-selector.js";
 export { type SettingsCallbacks, type SettingsConfig, SettingsSelectorComponent } from "./settings-selector.js";
--- a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts
@ -160,7 +160,7 @@ export class ModelSelectorComponent extends Container implements Focusable {

 		// Load available models (built-in models still work even if models.json failed)
 		try {
-			const availableModels = await this.modelRegistry.getAvailable();
+			const availableModels = this.modelRegistry.getAvailable();
 			models = availableModels.map((model: Model<any>) => ({
 				provider: model.provider,
 				id: model.id,
--- a/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts
@ -0,0 +1,163 @@
+/**
+ * TUI component for managing provider configurations.
+ * Shows providers with auth status, discovery support, and model counts.
+ */
+
+import {
+	Container,
+	type Focusable,
+	getEditorKeybindings,
+	Spacer,
+	Text,
+	type TUI,
+} from "@gsd/pi-tui";
+import type { AuthStorage } from "../../../core/auth-storage.js";
+import { getDiscoverableProviders } from "../../../core/model-discovery.js";
+import type { ModelRegistry } from "../../../core/model-registry.js";
+import { theme } from "../theme/theme.js";
+import { rawKeyHint } from "./keybinding-hints.js";
+
+interface ProviderInfo {
+	name: string;
+	hasAuth: boolean;
+	supportsDiscovery: boolean;
+	modelCount: number;
+}
+
+export class ProviderManagerComponent extends Container implements Focusable {
+	private _focused = false;
+	get focused(): boolean {
+		return this._focused;
+	}
+	set focused(value: boolean) {
+		this._focused = value;
+	}
+
+	private providers: ProviderInfo[] = [];
+	private selectedIndex = 0;
+	private listContainer: Container;
+	private tui: TUI;
+	private authStorage: AuthStorage;
+	private modelRegistry: ModelRegistry;
+	private onDone: () => void;
+	private onDiscover: (provider: string) => void;
+
+	constructor(
+		tui: TUI,
+		authStorage: AuthStorage,
+		modelRegistry: ModelRegistry,
+		onDone: () => void,
+		onDiscover: (provider: string) => void,
+	) {
+		super();
+
+		this.tui = tui;
+		this.authStorage = authStorage;
+		this.modelRegistry = modelRegistry;
+		this.onDone = onDone;
+		this.onDiscover = onDiscover;
+
+		// Header
+		this.addChild(new Text(theme.fg("accent", "Provider Manager"), 0, 0));
+		this.addChild(new Spacer(1));
+
+		// Hints
+		const hints = [
+			rawKeyHint("d", "discover"),
+			rawKeyHint("r", "remove auth"),
+			rawKeyHint("esc", "close"),
+		].join("  ");
+		this.addChild(new Text(hints, 0, 0));
+		this.addChild(new Spacer(1));
+
+		// List
+		this.listContainer = new Container();
+		this.addChild(this.listContainer);
+
+		this.loadProviders();
+		this.updateList();
+	}
+
+	private loadProviders(): void {
+		const discoverableSet = new Set(getDiscoverableProviders());
+		const allModels = this.modelRegistry.getAll();
+
+		// Group models by provider
+		const providerModelCounts = new Map<string, number>();
+		for (const model of allModels) {
+			providerModelCounts.set(model.provider, (providerModelCounts.get(model.provider) ?? 0) + 1);
+		}
+
+		// Build provider list from all known providers
+		const providerNames = new Set([
+			...providerModelCounts.keys(),
+			...discoverableSet,
+		]);
+
+		this.providers = Array.from(providerNames)
+			.sort()
+			.map((name) => ({
+				name,
+				hasAuth: this.authStorage.hasAuth(name),
+				supportsDiscovery: discoverableSet.has(name),
+				modelCount: providerModelCounts.get(name) ?? 0,
+			}));
+	}
+
+	private updateList(): void {
+		this.listContainer.clear();
+
+		for (let i = 0; i < this.providers.length; i++) {
+			const p = this.providers[i];
+			const isSelected = i === this.selectedIndex;
+
+			const authBadge = p.hasAuth ? theme.fg("success", "[auth]") : theme.fg("muted", "[no auth]");
+			const discoveryBadge = p.supportsDiscovery ? theme.fg("accent", "[discovery]") : "";
+			const countBadge = theme.fg("muted", `(${p.modelCount} models)`);
+
+			const prefix = isSelected ? theme.fg("accent", "> ") : "  ";
+			const nameText = isSelected ? theme.fg("accent", p.name) : p.name;
+
+			const parts = [prefix, nameText, " ", authBadge];
+			if (discoveryBadge) parts.push(" ", discoveryBadge);
+			parts.push(" ", countBadge);
+
+			this.listContainer.addChild(new Text(parts.join(""), 0, 0));
+		}
+
+		if (this.providers.length === 0) {
+			this.listContainer.addChild(new Text(theme.fg("muted", "  No providers configured"), 0, 0));
+		}
+	}
+
+	handleInput(keyData: string): void {
+		const kb = getEditorKeybindings();
+
+		if (kb.matches(keyData, "selectUp")) {
+			if (this.providers.length === 0) return;
+			this.selectedIndex = this.selectedIndex === 0 ? this.providers.length - 1 : this.selectedIndex - 1;
+			this.updateList();
+			this.tui.requestRender();
+		} else if (kb.matches(keyData, "selectDown")) {
+			if (this.providers.length === 0) return;
+			this.selectedIndex = this.selectedIndex === this.providers.length - 1 ? 0 : this.selectedIndex + 1;
+			this.updateList();
+			this.tui.requestRender();
+		} else if (kb.matches(keyData, "selectCancel")) {
+			this.onDone();
+		} else if (keyData === "d" || keyData === "D") {
+			const provider = this.providers[this.selectedIndex];
+			if (provider?.supportsDiscovery) {
+				this.onDiscover(provider.name);
+			}
+		} else if (keyData === "r" || keyData === "R") {
+			const provider = this.providers[this.selectedIndex];
+			if (provider?.hasAuth) {
+				this.authStorage.remove(provider.name);
+				this.loadProviders();
+				this.updateList();
+				this.tui.requestRender();
+			}
+		}
+	}
+}
--- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts
@ -83,6 +83,7 @@ import { appKey, appKeyHint, editorKey, formatKeyForDisplay, keyHint, rawKeyHint
 import { LoginDialogComponent } from "./components/login-dialog.js";
 import { ModelSelectorComponent } from "./components/model-selector.js";
 import { OAuthSelectorComponent } from "./components/oauth-selector.js";
+import { ProviderManagerComponent } from "./components/provider-manager.js";
 import { ScopedModelsSelectorComponent } from "./components/scoped-models-selector.js";
 import { SessionSelectorComponent } from "./components/session-selector.js";
 import { SelectSubmenu, SettingsSelectorComponent, THINKING_DESCRIPTIONS } from "./components/settings-selector.js";
@ -1728,7 +1729,7 @@ export class InteractiveMode {
 	/**
 	 * Show a notification for extensions.
 	 */
-	private showExtensionNotify(message: string, type?: "info" | "warning" | "error"): void {
+	private showExtensionNotify(message: string, type?: "info" | "warning" | "error" | "success"): void {
 		if (type === "error") {
 			this.showError(message);
 		} else if (type === "warning") {
@ -1997,6 +1998,11 @@ export class InteractiveMode {
 				this.editor.setText("");
 				return;
 			}
+			if (text === "/provider") {
+				this.showProviderManager();
+				this.editor.setText("");
+				return;
+			}
 			if (text === "/login") {
 				this.showOAuthSelector("login");
 				this.editor.setText("");
@ -3746,6 +3752,37 @@ export class InteractiveMode {
 		this.showStatus("Resumed session");
 	}

+	private showProviderManager(): void {
+		this.showSelector((done) => {
+			const component = new ProviderManagerComponent(
+				this.ui,
+				this.session.modelRegistry.authStorage,
+				this.session.modelRegistry,
+				() => {
+					done();
+					this.ui.requestRender();
+				},
+				async (provider: string) => {
+					this.showStatus(`Discovering models for ${provider}...`);
+					try {
+						const results = await this.session.modelRegistry.discoverModels([provider]);
+						const result = results[0];
+						if (result?.error) {
+							this.showError(`Discovery failed: ${result.error}`);
+						} else {
+							this.showStatus(`Discovered ${result?.models.length ?? 0} models from ${provider}`);
+						}
+					} catch (error) {
+						this.showError(error instanceof Error ? error.message : String(error));
+					}
+					done();
+					this.ui.requestRender();
+				},
+			);
+			return { component, focus: component };
+		});
+	}
+
 	private async showOAuthSelector(mode: "login" | "logout"): Promise<void> {
 		if (mode === "logout") {
 			const providers = this.session.modelRegistry.authStorage.list();
--- a/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts
@ -11,6 +11,11 @@ import {
 } from "@gsd/native";
 import { getCustomThemesDir, getThemesDir } from "../../../config.js";

+// Issue #453: native preview highlighting can wedge the entire interactive
+// session after a successful file tool. Keep the safer plain-text path as the
+// default and allow native highlighting only as an explicit opt-in.
+const NATIVE_TUI_HIGHLIGHT_ENABLED = process.env.GSD_ENABLE_NATIVE_TUI_HIGHLIGHT === "1";
+
 // ============================================================================
 // Types & Schema
 // ============================================================================
@ -955,6 +960,10 @@ function getHighlightColors(t: Theme): HighlightColors {
 * Returns array of highlighted lines.
 */
 export function highlightCode(code: string, lang?: string): string[] {
+	if (!NATIVE_TUI_HIGHLIGHT_ENABLED) {
+		return code.split("\n");
+	}
+
 	const validLang = lang && supportsLanguage(lang) ? lang : null;
 	try {
 		return nativeHighlightCode(code, validLang, getHighlightColors(theme)).split("\n");
@ -1051,6 +1060,10 @@ export function getMarkdownTheme(): MarkdownTheme {
 		underline: (text: string) => theme.underline(text),
 		strikethrough: (text: string) => chalk.strikethrough(text),
 		highlightCode: (code: string, lang?: string): string[] => {
+			if (!NATIVE_TUI_HIGHLIGHT_ENABLED) {
+				return code.split("\n").map((line) => theme.fg("mdCodeBlock", line));
+			}
+
 			const validLang = lang && supportsLanguage(lang) ? lang : null;
 			try {
 				return nativeHighlightCode(code, validLang, getHighlightColors(theme)).split("\n");
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
@ -133,7 +133,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 				"cancelled" in r && r.cancelled ? undefined : "value" in r ? r.value : undefined,
 			),

-		notify(message: string, type?: "info" | "warning" | "error"): void {
+		notify(message: string, type?: "info" | "warning" | "error" | "success"): void {
 			// Fire and forget - no response needed
 			output({
 				type: "extension_ui_request",
--- a/packages/pi-tui/src/components/tests/cancellable-loader.test.ts
+++ b/packages/pi-tui/src/components/tests/cancellable-loader.test.ts
@ -0,0 +1,45 @@
+// pi-tui CancellableLoader component regression tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { CancellableLoader } from "../cancellable-loader.js";
+
+function makeMockTUI() {
+	return { requestRender: mock.fn() } as any;
+}
+
+describe("CancellableLoader", () => {
+	let loader: CancellableLoader;
+	let tui: ReturnType<typeof makeMockTUI>;
+
+	beforeEach(() => {
+		tui = makeMockTUI();
+	});
+
+	afterEach(() => {
+		loader?.dispose();
+	});
+
+	it("dispose() aborts the AbortController signal", () => {
+		loader = new CancellableLoader(tui, (s) => s, (s) => s, "test");
+		assert.equal(loader.aborted, false);
+		loader.dispose();
+		assert.equal(loader.aborted, true);
+	});
+
+	it("dispose() clears the onAbort callback", () => {
+		loader = new CancellableLoader(tui, (s) => s, (s) => s, "test");
+		loader.onAbort = () => {};
+		loader.dispose();
+		assert.equal(loader.onAbort, undefined);
+	});
+
+	it("signal is aborted after dispose()", () => {
+		loader = new CancellableLoader(tui, (s) => s, (s) => s, "test");
+		const signal = loader.signal;
+		assert.equal(signal.aborted, false);
+		loader.dispose();
+		assert.equal(signal.aborted, true);
+	});
+});
--- a/packages/pi-tui/src/components/tests/input.test.ts
+++ b/packages/pi-tui/src/components/tests/input.test.ts
@ -0,0 +1,35 @@
+// pi-tui Input component regression tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { Input } from "../input.js";
+
+describe("Input", () => {
+	it("paste buffer is cleared when focus is lost", () => {
+		const input = new Input();
+		input.focused = true;
+
+		// Simulate starting a paste (bracket paste start marker)
+		input.handleInput("\x1b[200~partial");
+
+		// Now lose focus mid-paste
+		input.focused = false;
+
+		// Regain focus — should not have stale paste state
+		input.focused = true;
+
+		// Typing normal text should work without paste buffer corruption
+		input.handleInput("hello");
+		assert.equal(input.getValue(), "hello");
+	});
+
+	it("focused getter/setter works correctly", () => {
+		const input = new Input();
+		assert.equal(input.focused, false);
+		input.focused = true;
+		assert.equal(input.focused, true);
+		input.focused = false;
+		assert.equal(input.focused, false);
+	});
+});
--- a/packages/pi-tui/src/components/tests/loader.test.ts
+++ b/packages/pi-tui/src/components/tests/loader.test.ts
@ -0,0 +1,45 @@
+// pi-tui Loader component regression tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { Loader } from "../loader.js";
+
+function makeMockTUI() {
+	return { requestRender: mock.fn() } as any;
+}
+
+describe("Loader", () => {
+	let loader: Loader;
+	let tui: ReturnType<typeof makeMockTUI>;
+
+	beforeEach(() => {
+		tui = makeMockTUI();
+	});
+
+	afterEach(() => {
+		loader?.stop();
+	});
+
+	it("start() is idempotent — calling twice does not leak intervals", () => {
+		loader = new Loader(tui, (s) => s, (s) => s, "test");
+		// Constructor calls start() once, call it again
+		loader.start();
+		// stop() should clear the interval cleanly without orphaned timers
+		loader.stop();
+	});
+
+	it("dispose() stops the interval and nulls the TUI reference", () => {
+		loader = new Loader(tui, (s) => s, (s) => s, "test");
+		loader.dispose();
+		// After dispose, calling stop() again should be safe (no-op)
+		loader.stop();
+	});
+
+	it("stop() is safe to call multiple times", () => {
+		loader = new Loader(tui, (s) => s, (s) => s, "test");
+		loader.stop();
+		loader.stop();
+		loader.stop();
+	});
+});
--- a/packages/pi-tui/src/components/cancellable-loader.ts
+++ b/packages/pi-tui/src/components/cancellable-loader.ts
@ -35,6 +35,8 @@ export class CancellableLoader extends Loader {
 	}

 	dispose(): void {
+		this.abortController.abort();
+		this.onAbort = undefined;
 		this.stop();
 	}
 }
--- a/packages/pi-tui/src/components/editor.ts
+++ b/packages/pi-tui/src/components/editor.ts
@ -182,7 +182,7 @@ export class Editor implements Component, Focusable {
 	private undoStack = new UndoStack<EditorState>();
 	private textVersion = 0;
 	private cachedText: string | null = null;
-	private layoutCache: { width: number; textVersion: number; lines: LayoutLine[] } | null = null;
+	private layoutCache: { width: number; textVersion: number; cursorLine: number; cursorCol: number; lines: LayoutLine[] } | null = null;
 	private visualLineMapCache: { width: number; textVersion: number; lines: VisualLine[] } | null = null;

 	public onSubmit?: (text: string) => void;
@ -243,12 +243,14 @@ export class Editor implements Component, Focusable {

 	private getLayoutLines(width: number): LayoutLine[] {
 		const cached = this.layoutCache;
-		if (cached && cached.width === width && cached.textVersion === this.textVersion) {
+		if (cached && cached.width === width && cached.textVersion === this.textVersion
+			&& cached.cursorLine === this.state.cursorLine && cached.cursorCol === this.state.cursorCol) {
 			return cached.lines;
 		}

 		const lines = this.layoutText(width);
-		this.layoutCache = { width, textVersion: this.textVersion, lines };
+		this.layoutCache = { width, textVersion: this.textVersion, lines,
+			cursorLine: this.state.cursorLine, cursorCol: this.state.cursorCol };
 		return lines;
 	}

@ -730,8 +732,17 @@ export class Editor implements Component, Focusable {
 			return;
 		}

-		// Regular characters
+		// Regular characters — reject partial escape sequence remnants that can
+		// occur when event loop latency causes the StdinBuffer to split an escape
+		// sequence (e.g. \x1b flushed as ESC, then "[D" arrives as text).
 		if (data.charCodeAt(0) >= 32) {
+			if (data[0] === "[" && data.length >= 2 && data.length <= 8) {
+				const last = data[data.length - 1]!;
+				// CSI navigation remnants: [A-F (arrows/home/end), [H, [Z (shift-tab), [<n>~ (func keys)
+				if (/^[A-FHZ]$/.test(last) || last === "~") {
+					return; // Drop CSI remnant (e.g. "[D", "[C", "[5~")
+				}
+			}
 			this.insertCharacter(data);
 		}
 	}
@ -2055,6 +2066,10 @@ https://github.com/EsotericSoftware/spine-runtimes/actions/runs/19536643416/job/
 		this.lastAutocompleteLookupPrefix = null;
 	}

+	public dispose(): void {
+		this.clearAutocompleteDebounce();
+	}
+
 	public isShowingAutocomplete(): boolean {
 		return this.autocompleteState !== null;
 	}
--- a/packages/pi-tui/src/components/input.ts
+++ b/packages/pi-tui/src/components/input.ts
@ -23,7 +23,17 @@ export class Input implements Component, Focusable {
 	public placeholder: string = "";

 	/** Focusable interface - set by TUI when focus changes */
-	focused: boolean = false;
+	private _focused: boolean = false;
+	get focused(): boolean {
+		return this._focused;
+	}
+	set focused(value: boolean) {
+		this._focused = value;
+		if (!value) {
+			this.isInPaste = false;
+			this.pasteBuffer = "";
+		}
+	}

 	// Bracketed paste mode buffering
 	private pasteBuffer: string = "";
--- a/packages/pi-tui/src/components/loader.ts
+++ b/packages/pi-tui/src/components/loader.ts
@ -26,6 +26,9 @@ export class Loader extends Text {
 	}

 	start() {
+		if (this.intervalId) {
+			clearInterval(this.intervalId);
+		}
 		this.updateDisplay();
 		this.intervalId = setInterval(() => {
 			this.currentFrame = (this.currentFrame + 1) % this.frames.length;
@ -40,6 +43,11 @@ export class Loader extends Text {
 		}
 	}

+	dispose() {
+		this.stop();
+		this.ui = null;
+	}
+
 	setMessage(message: string) {
 		this.message = message;
 		this.updateDisplay();
--- a/packages/pi-tui/src/terminal.ts
+++ b/packages/pi-tui/src/terminal.ts
@ -112,7 +112,10 @@ export class ProcessTerminal implements Terminal {
 	 * to handle the case where the response arrives split across multiple events.
 	 */
 	private setupStdinBuffer(): void {
-		this.stdinBuffer = new StdinBuffer({ timeout: 10 });
+		// 50ms matches xterm's default escapeCodeTimeout and gives enough headroom
+		// for escape sequences that arrive split across multiple stdin data events
+		// (e.g. \x1b arriving separately from [D due to event loop latency).
+		this.stdinBuffer = new StdinBuffer({ timeout: 50 });

 		// Kitty protocol response pattern: \x1b[?<flags>u
 		const kittyResponsePattern = /^\x1b\[\?(\d+)u$/;
--- a/packages/pi-tui/src/tui.ts
+++ b/packages/pi-tui/src/tui.ts
@ -441,6 +441,15 @@ export class TUI extends Container {

 	stop(): void {
 		this.stopped = true;
+
+		// Dispose all overlays to stop any running timers
+		for (const entry of this.overlayStack) {
+			if ("dispose" in entry.component && typeof (entry.component as any).dispose === "function") {
+				(entry.component as any).dispose();
+			}
+		}
+		this.overlayStack = [];
+
 		// Move cursor to the end of the content to prevent overwriting/artifacts on exit
 		if (this.previousLines.length > 0) {
 			const targetRow = this.previousLines.length; // Line after the last content
--- a/src/cli.ts
+++ b/src/cli.ts
@ -17,6 +17,7 @@ import { ensureManagedTools } from './tool-bootstrap.js'
 import { loadStoredEnvKeys } from './wizard.js'
 import { getPiDefaultModelAndProvider, migratePiCredentials } from './pi-migration.js'
 import { shouldRunOnboarding, runOnboarding } from './onboarding.js'
+import chalk from 'chalk'
 import { checkForUpdates } from './update-check.js'

 // ---------------------------------------------------------------------------
@ -42,15 +43,10 @@ function exitIfManagedResourcesAreNewer(currentAgentDir: string): void {
    return
  }

-  const yellow = '\x1b[33m'
-  const dim = '\x1b[2m'
-  const reset = '\x1b[0m'
-  const bold = '\x1b[1m'
-
  process.stderr.write(
-    `[gsd] ${yellow}Version mismatch detected${reset}\n` +
-    `[gsd] Synced resources are from ${bold}v${managedVersion}${reset}, but this \`gsd\` binary is ${dim}v${currentVersion}${reset}.\n` +
-    `[gsd] Run ${bold}npm install -g gsd-pi@latest${reset} or ${bold}gsd update${reset}, then try again.\n`,
+    `[gsd] ${chalk.yellow('Version mismatch detected')}\n` +
+    `[gsd] Synced resources are from ${chalk.bold(`v${managedVersion}`)}, but this \`gsd\` binary is ${chalk.dim(`v${currentVersion}`)}.\n` +
+    `[gsd] Run ${chalk.bold('npm install -g gsd-pi@latest')} or ${chalk.bold('gsd update')}, then try again.\n`,
  )
  process.exit(1)
 }
@ -137,6 +133,15 @@ migratePiCredentials(authStorage)
 // Run onboarding wizard on first launch (no LLM provider configured)
 if (!isPrintMode && shouldRunOnboarding(authStorage)) {
  await runOnboarding(authStorage)
+
+  // Clean up stdin state left by @clack/prompts.
+  // readline.emitKeypressEvents() adds a permanent data listener and
+  // readline.createInterface() may leave stdin paused. Remove stale
+  // listeners and pause stdin so the TUI can start with a clean slate.
+  process.stdin.removeAllListeners('data')
+  process.stdin.removeAllListeners('keypress')
+  if (process.stdin.setRawMode) process.stdin.setRawMode(false)
+  process.stdin.pause()
 }

 // Non-blocking update check — runs at most once per 24h, fire-and-forget
@ -144,6 +149,13 @@ if (!isPrintMode) {
  checkForUpdates().catch(() => {})
 }

+// Warn if terminal is too narrow for readable output
+if (!isPrintMode && process.stdout.columns && process.stdout.columns < 40) {
+  process.stderr.write(
+    chalk.yellow(`[gsd] Terminal width is ${process.stdout.columns} columns (minimum recommended: 40). Output may be unreadable.\n`),
+  )
+}
+
 const modelRegistry = new ModelRegistry(authStorage)
 const settingsManager = SettingsManager.create(agentDir)

--- a/src/resource-loader.ts
+++ b/src/resource-loader.ts
@ -22,6 +22,7 @@ const resourceVersionManifestName = 'managed-resources.json'

 interface ManagedResourceManifest {
  gsdVersion: string
+  syncedAt?: number
 }

 function isExtensionFile(name: string): boolean {
@ -102,7 +103,7 @@ function getBundledGsdVersion(): string {
 }

 function writeManagedResourceManifest(agentDir: string): void {
-  const manifest: ManagedResourceManifest = { gsdVersion: getBundledGsdVersion() }
+  const manifest: ManagedResourceManifest = { gsdVersion: getBundledGsdVersion(), syncedAt: Date.now() }
  writeFileSync(getManagedResourceManifestPath(agentDir), JSON.stringify(manifest))
 }

@ -115,6 +116,15 @@ export function readManagedResourceVersion(agentDir: string): string | null {
  }
 }

+export function readManagedResourceSyncedAt(agentDir: string): number | null {
+  try {
+    const manifest = JSON.parse(readFileSync(getManagedResourceManifestPath(agentDir), 'utf-8')) as ManagedResourceManifest
+    return typeof manifest?.syncedAt === 'number' ? manifest.syncedAt : null
+  } catch {
+    return null
+  }
+}
+
 export function getNewerManagedResourceVersion(agentDir: string, currentVersion: string): string | null {
  const managedVersion = readManagedResourceVersion(agentDir)
  if (!managedVersion) {
--- a/src/resources/GSD-WORKFLOW.md
+++ b/src/resources/GSD-WORKFLOW.md
@ -4,8 +4,8 @@
 >
 > **When to read this:** At the start of any session working on GSD-managed work, or when loaded by `/gsd`.
 >
-> **After reading this, always read `.gsd/state.md` to find out what's next.**
-> If the milestone has a `context.md`, read that too — it contains project-specific decisions, reference paths, and implementation guidance that this generic methodology doc does not.
+> **After reading this, always read `.gsd/STATE.md` to find out what's next.**
+> If the milestone has a `M###-CONTEXT.md`, read that too. If the active slice has an `S##-CONTEXT.md`, read that as well — these files contain project-specific decisions, reference paths, and implementation guidance that this generic methodology doc does not.

 ---

@ -13,13 +13,14 @@

 Read these files in order and act on what they say:

-1. **`.gsd/state.md`** — Where are we? What's the next action?
-2. **`.gsd/milestones/<active>/roadmap.md`** — What's the plan? Which slices are done? (state.md tells you which milestone is active)
-3. **`.gsd/milestones/<active>/context.md`** — Project-specific decisions, reference paths, constraints. Read this before doing implementation work.
-4. If a slice is active, read its **`plan.md`** — Which tasks exist? Which are done?
-5. If a task was interrupted, check for **`continue.md`** in the active slice directory — Resume from there.
+1. **`.gsd/STATE.md`** — Where are we? What's the next action?
+2. **`.gsd/milestones/<active>/M###-ROADMAP.md`** — What's the plan? Which slices are done? (`STATE.md` tells you which milestone is active)
+3. **`.gsd/milestones/<active>/M###-CONTEXT.md`** — Milestone-level project decisions, reference paths, constraints. Read this before doing implementation work.
+4. If a slice is active and has one, read **`S##-CONTEXT.md`** — Slice-specific decisions and constraints.
+5. If a slice is active, read its **`S##-PLAN.md`** — Which tasks exist? Which are done?
+6. If a task was interrupted, check for **`continue.md`** in the active slice directory — Resume from there.

-Then do the thing `state.md` says to do next.
+Then do the thing `STATE.md` says to do next.

 ---

@ -41,32 +42,32 @@ All artifacts live in `.gsd/` at the project root:

 ```
 .gsd/
-  state.md                                  # Dashboard — always read first
-  decisions.md                              # Append-only decisions register
+  STATE.md                                  # Dashboard — always read first (derived cache; runtime, gitignored)
+  DECISIONS.md                              # Append-only decisions register
  milestones/
    M001/
-      roadmap.md                            # Milestone plan (checkboxes = state)
-      context.md                            # Optional: user decisions from discuss phase
-      research.md                           # Optional: codebase/tech research
-      summary.md                            # Milestone rollup (updated as slices complete)
+      M001-ROADMAP.md                       # Milestone plan (checkboxes = state)
+      M001-CONTEXT.md                       # Optional: user decisions from discuss phase
+      M001-RESEARCH.md                      # Optional: codebase/tech research
+      M001-SUMMARY.md                       # Milestone rollup (updated as slices complete)
      slices/
        S01/
-          plan.md                           # Task decomposition for this slice
-          context.md                        # Optional: slice-level user decisions
-          research.md                       # Optional: slice-level research
-          summary.md                        # Slice summary (written on completion)
-          uat.md                            # Non-blocking human test script (written on completion)
+          S01-PLAN.md                       # Task decomposition for this slice
+          S01-CONTEXT.md                    # Optional: slice-level user decisions
+          S01-RESEARCH.md                   # Optional: slice-level research
+          S01-SUMMARY.md                    # Slice summary (written on completion)
+          S01-UAT.md                        # Non-blocking human test script (written on completion)
          continue.md                       # Ephemeral: resume point if interrupted
          tasks/
-            T01-plan.md                     # Individual task plan
-            T01-summary.md                  # Task summary with frontmatter
+            T01-PLAN.md                     # Individual task plan
+            T01-SUMMARY.md                  # Task summary with frontmatter
 ```

 ---

 ## File Format Reference

-### `roadmap.md`
+### `M###-ROADMAP.md`

 ```markdown
 # M001: Title of the Milestone
@ -93,7 +94,7 @@ All artifacts live in `.gsd/` at the project root:

 **Parsing rules:** `- [x]` = done, `- [ ]` = not done. The `risk:` and `depends:[]` tags are inline metadata parsed from the line. `depends:[]` lists slice IDs this slice requires to be complete first.

-**Boundary Map** (required section in roadmap.md):
+**Boundary Map** (required section in M###-ROADMAP.md):

 After the slices section, include a `## Boundary Map` that shows what each slice produces and consumes:

@ -123,7 +124,7 @@ The boundary map is a **planning artifact** — not runnable code. It:
 - Enables deterministic verification that slices actually connect
 - Gets updated during slice planning if new interfaces emerge

-### `plan.md` (slice-level)
+### `S##-PLAN.md` (slice-level)

 ```markdown
 # S01: Slice Title
@ -148,7 +149,7 @@ The boundary map is a **planning artifact** — not runnable code. It:
 - path/to/another.ts
 ```

-### `TNN-plan.md` (task-level)
+### `T##-PLAN.md` (task-level)

 ```markdown
 # T01: Task Title
@ -188,7 +189,7 @@ Critical wiring between artifacts:

 **Must-haves are what make verification mechanically checkable.** Truths are checked by running commands or reading output. Artifacts are checked by confirming files exist with real content. Key links are checked by confirming imports/references actually connect the pieces.

-### `state.md`
+### `STATE.md`

 ```markdown
 # GSD State
@ -209,10 +210,10 @@ Critical wiring between artifacts:
 Exact next thing to do.
 ```

-### `context.md` (from discuss phase)
+### `M###-CONTEXT.md` / `S##-CONTEXT.md` (from discuss phase)

 ```markdown
-# S01: Slice Title — Context
+# M001: Milestone or Slice Title — Context

 **Gathered:** 2026-03-07
 **Status:** Ready for planning
@ -228,7 +229,7 @@ Exact next thing to do.
 - Ideas that came up but belong in other slices
 ```

-### `decisions.md` (append-only register)
+### `DECISIONS.md` (append-only register)

 ```markdown
 # Decisions Register
@ -265,7 +266,7 @@ Work flows through these phases. Each phase produces a file.
 ### Phase 1: Discuss (Optional)

 **Purpose:** Capture user decisions on gray areas before planning.
-**Produces:** `context.md` at milestone or slice level.
+**Produces:** `M###-CONTEXT.md` for milestone-level discussion or `S##-CONTEXT.md` for slice-level discussion.
 **When to use:** When the scope has ambiguities the user should weigh in on.
 **When to skip:** When the user already knows exactly what they want, or told you to just go.

@ -273,18 +274,18 @@ Work flows through these phases. Each phase produces a file.
 1. Read the roadmap to understand the scope.
 2. Identify 3-5 gray areas — implementation decisions the user cares about.
 3. Use `ask_user_questions` to discuss each area.
-4. Write decisions to `context.md`.
+4. Write decisions to the appropriate context file (`M###-CONTEXT.md` or `S##-CONTEXT.md`).
 5. Do NOT discuss how to implement — only what the user wants.

 ### Phase 2: Research (Optional)

 **Purpose:** Scout the codebase and relevant docs before planning.
-**Produces:** `research.md` at milestone or slice level.
+**Produces:** `M###-RESEARCH.md` at milestone level or `S##-RESEARCH.md` at slice level.
 **When to use:** When working in unfamiliar code, with unfamiliar libraries, or on complex integrations.
 **When to skip:** When the codebase is familiar and the work is straightforward.

 **How to do it manually:**
-1. Read `context.md` if it exists — know what decisions are locked.
+1. Read `M###-CONTEXT.md` and/or `S##-CONTEXT.md` if they exist — know what decisions are locked.
 2. Scout relevant code: `rg`, `find`, read key files.
 3. Use `resolve_library` / `get_library_docs` if needed.
 4. Write findings to `research.md` with these sections:
@ -324,24 +325,24 @@ The **Don't Hand-Roll** and **Common Pitfalls** sections prevent the most expens
 ### Phase 3: Plan

 **Purpose:** Decompose work into context-window-sized tasks with must-haves.
-**Produces:** `plan.md` + individual `T01-plan.md` files.
+**Produces:** `S##-PLAN.md` + individual `T01-PLAN.md` files.

 **For a milestone (roadmap):**
-1. Read `context.md`, `research.md`, and `.gsd/decisions.md` if they exist.
+1. Read `M###-CONTEXT.md`, `M###-RESEARCH.md`, and `.gsd/DECISIONS.md` if they exist.
 2. Decompose the vision into 4-10 demoable vertical slices.
 3. Order by risk (high-risk first to validate feasibility early).
-4. Write `roadmap.md` with checkboxes, risk levels, dependencies, demo sentences.
+4. Write `M###-ROADMAP.md` with checkboxes, risk levels, dependencies, demo sentences.
 5. **Write the boundary map** — for each slice, specify what it produces (functions, types, interfaces, endpoints) and what it consumes from upstream slices. This forces interface thinking before implementation and enables deterministic verification that slices actually connect.

 **For a slice (task decomposition):**
-1. Read the slice's entry in `roadmap.md` **and its boundary map section** — know what interfaces this slice must produce and consume.
-2. Read `context.md`, `research.md`, and `.gsd/decisions.md` if they exist for this slice.
+1. Read the slice's entry in `M###-ROADMAP.md` **and its boundary map section** — know what interfaces this slice must produce and consume.
+2. Read `M###-CONTEXT.md`, `S##-CONTEXT.md`, `M###-RESEARCH.md`, `S##-RESEARCH.md`, and `.gsd/DECISIONS.md` if they exist for this slice.
 3. Read summaries from dependency slices (check `depends:[]` in roadmap).
 4. Verify that upstream slices' actual outputs match what the boundary map says this slice consumes. If they diverge, update the boundary map.
 5. Decompose into 1-7 tasks, each fitting one context window.
 6. Each task needs: title, description, steps (3-10), must-haves (observable verification criteria).
 7. Must-haves should reference boundary map contracts — e.g. "exports `generateToken()` as specified in boundary map S01→S02".
-8. Write `plan.md` and individual `TNN-plan.md` files.
+8. Write `S##-PLAN.md` and individual `T##-PLAN.md` files.

 ### Phase 4: Execute

@ -349,10 +350,10 @@ The **Don't Hand-Roll** and **Common Pitfalls** sections prevent the most expens
 **Produces:** Code changes + `[DONE:n]` markers.

 **How to do it manually:**
-1. Read the task's `TNN-plan.md`.
+1. Read the task's `T##-PLAN.md`.
 2. Read relevant summaries from prior tasks (for context on what's already built).
 3. Execute each step. Mark progress with `[DONE:n]` in responses.
-4. If you made an architectural, pattern, or library decision, append it to `.gsd/decisions.md`.
+4. If you made an architectural, pattern, or library decision, append it to `.gsd/DECISIONS.md`.
 5. If interrupted or context is getting full, write `continue.md` (see below).

 ### Phase 5: Verify
@ -400,7 +401,7 @@ When verification finds gaps, include a **Gaps** section with what's missing, im
 ### Phase 6: Summarize

 **Purpose:** Record what happened for downstream tasks.
-**Produces:** `TNN-summary.md`, and when slice completes, `summary.md`.
+**Produces:** `T##-SUMMARY.md`, and when slice completes, `S##-SUMMARY.md`.

 **Task summary format:**
 ```markdown
@ -421,7 +422,7 @@ key_decisions:
 patterns_established:
  - "Pattern name and where it lives"
 drill_down_paths:
-  - .gsd/milestones/M001/slices/S01/tasks/T01-plan.md
+  - .gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md
 duration: 15min
 verification_result: pass
 completed_at: 2026-03-07T16:00:00Z
@ -445,7 +446,7 @@ What differed from the plan and why (or "None").

 The one-liner must be substantive: "JWT auth with refresh rotation using jose" not "Authentication implemented."

-**Slice summary:** Written when all tasks in a slice complete. Compresses all task summaries. Includes `drill_down_paths` to each task summary. During slice completion, review task summaries for `key_decisions` and ensure any significant ones are captured in `.gsd/decisions.md`.
+**Slice summary:** Written when all tasks in a slice complete. Compresses all task summaries. Includes `drill_down_paths` to each task summary. During slice completion, review task summaries for `key_decisions` and ensure any significant ones are captured in `.gsd/DECISIONS.md`.

 **Milestone summary:** Updated each time a slice completes. Compresses all slice summaries. This is what gets injected into later slice planning instead of loading many individual summaries.

@ -454,16 +455,16 @@ The one-liner must be substantive: "JWT auth with refresh rotation using jose" n
 **Purpose:** Mark work done and move to the next thing.

 **After a task completes:**
-1. Mark the task done in `plan.md` (checkbox).
+1. Mark the task done in `S##-PLAN.md` (checkbox).
 2. Check if there's a next task in the slice → execute it.
-3. If slice is complete → write slice summary, mark slice done in `roadmap.md`.
+3. If slice is complete → write slice summary, mark slice done in `M###-ROADMAP.md`.

 **After a slice completes:**
-1. Write slice `summary.md` (compresses all task summaries).
-2. Write slice `uat.md` — a non-blocking human test script derived from the slice's must-haves and demo sentence. The agent does NOT wait for UAT results.
-3. Mark the slice checkbox in `roadmap.md` as `[x]`.
-4. Update `state.md` with new position.
-5. Update milestone `summary.md` with the completed slice's contributions.
+1. Write slice `S##-SUMMARY.md` (compresses all task summaries).
+2. Write slice `S##-UAT.md` — a non-blocking human test script derived from the slice's must-haves and demo sentence. The agent does NOT wait for UAT results.
+3. Mark the slice checkbox in `M###-ROADMAP.md` as `[x]`.
+4. Update `STATE.md` with new position.
+5. Update milestone `M###-SUMMARY.md` with the completed slice's contributions.
 6. Continue to next slice immediately. The user tests the UAT whenever convenient.
 7. If the user reports UAT failures later, create fix tasks in the current or a new slice.
 8. If all slices done → milestone complete.
@ -513,17 +514,17 @@ The EXACT first thing to do when resuming. Not vague. Specific.

 ## State Management

-### `state.md` is a derived cache
+### `STATE.md` is a derived cache

 It is NOT the source of truth. It's a convenience dashboard.

 **Sources of truth:**
- `roadmap.md` → which slices exist and which are done
- `plan.md` → which tasks exist within a slice
- `TNN-summary.md` → what happened during a task
- `summary.md` (slice/milestone) → compressed outcomes
+- `M###-ROADMAP.md` → which slices exist and which are done
+- `S##-PLAN.md` → which tasks exist within a slice
+- `T##-SUMMARY.md` → what happened during a task
+- `S##-SUMMARY.md` and `M###-SUMMARY.md` → compressed slice and milestone outcomes

-**Update `state.md`** after every significant action:
+**Update `STATE.md`** after every significant action:
 - Active milestone/slice/task
 - Recent decisions (last 3-5)
 - Blockers
@ -611,9 +612,9 @@ Tasks completed:

 When planning or executing a task, load relevant prior context:

-1. Check the current slice's `depends:[]` in `roadmap.md`.
+1. Check the current slice's `depends:[]` in `M###-ROADMAP.md`.
 2. Load summaries from those dependency slices.
-3. Start with the **highest available level** — milestone `summary.md` first.
+3. Start with the **highest available level** — milestone `M###-SUMMARY.md` first.
 4. Only drill down to slice/task summaries if you need specific detail.
 5. Stay within **~2500 tokens** of total injected summary context.
 6. If the dependency chain is too large, drop the oldest/least-relevant summaries first.
@ -630,32 +631,33 @@ These are soft caps — exceed them when genuinely needed, but don't let summari

 ## Project-Specific Context

-This methodology doc is generic. Project-specific guidance belongs in the milestone's `context.md`:
+This methodology doc is generic. Project-specific guidance belongs in the milestone and slice context files:

- **`.gsd/milestones/<active>/context.md`** — Architecture decisions, reference file paths, per-slice doc reading guides, implementation constraints, and any project-specific protocols (worktrees, testing, etc.)
+- **`.gsd/milestones/<active>/M###-CONTEXT.md`** — milestone-level architecture decisions, reference file paths, and implementation constraints
+- **`.gsd/milestones/<active>/slices/S##/S##-CONTEXT.md`** — slice-level decisions, edge cases, and narrow implementation guidance when present

-**Always read the active milestone's `context.md` before starting implementation work.** It tells you what decisions are locked, what files to reference, and how to verify your work in this specific project.
+**Always read the active milestone's `M###-CONTEXT.md` before starting implementation work.** If the active slice also has `S##-CONTEXT.md`, read that too. These files tell you what decisions are locked, what files to reference, and how to verify your work in this specific project.

 ---

 ## Checklist for a Fresh Session

-1. Read `.gsd/state.md` — what's the next action?
+1. Read `.gsd/STATE.md` — what's the next action?
 2. Check for `continue.md` in the active slice — is there interrupted work?
 3. If resuming: read `continue.md`, delete it, pick up from "Next Action".
-4. If starting fresh: read the active slice's `plan.md`, find the next incomplete task.
-5. If in a planning or research phase, read `.gsd/decisions.md` — respect existing decisions.
+4. If starting fresh: read the active slice's `S##-PLAN.md`, find the next incomplete task.
+5. If in a planning or research phase, read `.gsd/DECISIONS.md` — respect existing decisions.
 6. Read relevant summaries from prior tasks/slices for context.
 7. Do the work.
 8. Verify the must-haves.
 9. Write the summary.
-10. Mark done, update `state.md`, advance.
-11. If context is getting full or you're done for now: write `continue.md` if mid-task, or update `state.md` with next action if between tasks.
+10. Mark done, update `STATE.md`, advance.
+11. If context is getting full or you're done for now: write `continue.md` if mid-task, or update `STATE.md` with next action if between tasks.

 ## When Context Gets Large

 If you sense context pressure (many files read, long execution, lots of tool output):

 1. **If mid-task:** Write `continue.md` with exact resume state. Tell the user: "Context is getting full. I've saved progress to continue.md. Start a new session and run `/gsd` to pick up where you left off, or `/gsd auto` to resume in auto-execution mode."
-2. **If between tasks:** Just update `state.md` with the next action. No continue file needed — the next session will read state.md and pick up the next task cleanly.
+2. **If between tasks:** Just update `STATE.md` with the next action. No continue file needed — the next session will read STATE.md and pick up the next task cleanly.
 3. **Don't fight it.** The whole system is designed for this. A fresh session with the right files loaded is better than a stale session with degraded reasoning.
--- a/src/resources/extensions/async-jobs/async-bash-tool.ts
+++ b/src/resources/extensions/async-jobs/async-bash-tool.ts
@ -71,7 +71,7 @@ export function createAsyncBashTool(
 			"Check /jobs to see all running and recent background jobs.",
 		],
 		parameters: schema,
-		async execute(_toolCallId, params) {
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
 			const manager = getManager();
 			const cwd = getCwd();
 			const { command, timeout, label } = params;
@ -91,6 +91,7 @@ export function createAsyncBashTool(
 						"Use `await_job` to get results when ready, or `cancel_job` to stop.",
 					].join("\n"),
 				}],
+				details: undefined,
 			};
 		},
 	};
--- a/src/resources/extensions/async-jobs/await-tool.ts
+++ b/src/resources/extensions/async-jobs/await-tool.ts
@ -24,7 +24,7 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 		description:
 			"Wait for background jobs to complete. Provide specific job IDs or omit to wait for the next job that finishes. Returns results of completed jobs.",
 		parameters: schema,
-		async execute(_toolCallId, params) {
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
 			const manager = getManager();
 			const { jobs: jobIds } = params;

@ -43,6 +43,7 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 				if (notFound.length > 0 && watched.length === 0) {
 					return {
 						content: [{ type: "text", text: `No jobs found: ${notFound.join(", ")}` }],
+						details: undefined,
 					};
 				}
 			} else {
@ -50,6 +51,7 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 				if (watched.length === 0) {
 					return {
 						content: [{ type: "text", text: "No running background jobs." }],
+						details: undefined,
 					};
 				}
 			}
@ -59,7 +61,7 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 			if (running.length === 0) {
 				const result = formatResults(watched);
 				manager.acknowledgeDeliveries(watched.map((j) => j.id));
-				return { content: [{ type: "text", text: result }] };
+				return { content: [{ type: "text", text: result }], details: undefined };
 			}

 			// Wait for at least one to complete
@ -75,7 +77,7 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 				result += `\n\n**Still running:** ${stillRunning.map((j) => `${j.id} (${j.label})`).join(", ")}`;
 			}

-			return { content: [{ type: "text", text: result }] };
+			return { content: [{ type: "text", text: result }], details: undefined };
 		},
 	};
 }
--- a/src/resources/extensions/async-jobs/cancel-job-tool.ts
+++ b/src/resources/extensions/async-jobs/cancel-job-tool.ts
@ -16,7 +16,7 @@ export function createCancelJobTool(getManager: () => AsyncJobManager): ToolDefi
 		label: "Cancel Background Job",
 		description: "Cancel a running background job by its ID.",
 		parameters: schema,
-		async execute(_toolCallId, params) {
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
 			const manager = getManager();
 			const result = manager.cancel(params.job_id);

@ -28,6 +28,7 @@ export function createCancelJobTool(getManager: () => AsyncJobManager): ToolDefi

 			return {
 				content: [{ type: "text", text: messages[result] ?? `Unknown result: ${result}` }],
+				details: undefined,
 			};
 		},
 	};
--- a/src/resources/extensions/async-jobs/index.ts
+++ b/src/resources/extensions/async-jobs/index.ts
@ -62,7 +62,7 @@ export default function AsyncJobs(pi: ExtensionAPI) {
 							"",
 							truncatedOutput,
 						].join("\n"),
-						display: `Background job ${job.id} ${job.status}`,
+						display: true,
 					},
 					{ deliverAs: "followUp", triggerTurn: true },
 				);
@ -92,7 +92,7 @@ export default function AsyncJobs(pi: ExtensionAPI) {
 				pi.sendMessage({
 					customType: "async_jobs_list",
 					content: "No async job manager active.",
-					display: "No jobs",
+					display: true,
 				});
 				return;
 			}
@ -126,7 +126,7 @@ export default function AsyncJobs(pi: ExtensionAPI) {
 			pi.sendMessage({
 				customType: "async_jobs_list",
 				content: lines.join("\n"),
-				display: `${running.length} running, ${completed.length} recent`,
+				display: true,
 			});
 		},
 	});
--- a/src/resources/extensions/bg-shell/index.ts
+++ b/src/resources/extensions/bg-shell/index.ts
--- a/src/resources/extensions/bg-shell/interaction.ts
+++ b/src/resources/extensions/bg-shell/interaction.ts
@ -0,0 +1,198 @@
+/**
+ * Expect-style interactions: send_and_wait, run on session, query shell environment.
+ */
+
+import { randomUUID } from "node:crypto";
+import type { BgProcess } from "./types.js";
+
+// ── Query Shell Environment ────────────────────────────────────────────────
+
+export async function queryShellEnv(
+	bg: BgProcess,
+	timeout: number,
+	signal?: AbortSignal,
+): Promise<{ cwd: string; env: Record<string, string>; shell: string } | null> {
+	const sentinel = `__GSD_ENV_${randomUUID().slice(0, 8)}__`;
+	const startIndex = bg.output.length;
+
+	const cmd = [
+		`echo "${sentinel}_START"`,
+		`echo "CWD=$(pwd)"`,
+		`echo "SHELL=$SHELL"`,
+		`echo "PATH=$PATH"`,
+		`echo "VIRTUAL_ENV=$VIRTUAL_ENV"`,
+		`echo "NODE_ENV=$NODE_ENV"`,
+		`echo "HOME=$HOME"`,
+		`echo "USER=$USER"`,
+		`echo "NVM_DIR=$NVM_DIR"`,
+		`echo "GOPATH=$GOPATH"`,
+		`echo "CARGO_HOME=$CARGO_HOME"`,
+		`echo "PYTHONPATH=$PYTHONPATH"`,
+		`echo "${sentinel}_END"`,
+	].join(" && ");
+
+	bg.proc.stdin?.write(cmd + "\n");
+
+	const start = Date.now();
+	while (Date.now() - start < timeout) {
+		if (signal?.aborted) return null;
+		if (!bg.alive) return null;
+
+		const newEntries = bg.output.slice(startIndex);
+		const endIdx = newEntries.findIndex(e => e.line.includes(`${sentinel}_END`));
+		if (endIdx >= 0) {
+			const startIdx = newEntries.findIndex(e => e.line.includes(`${sentinel}_START`));
+			if (startIdx >= 0) {
+				const envLines = newEntries.slice(startIdx + 1, endIdx);
+				const env: Record<string, string> = {};
+				let cwd = "";
+				let shell = "";
+
+				for (const entry of envLines) {
+					const match = entry.line.match(/^([A-Z_]+)=(.*)$/);
+					if (match) {
+						const [, key, value] = match;
+						if (key === "CWD") {
+							cwd = value;
+						} else if (key === "SHELL") {
+							shell = value;
+						} else if (value) {
+							env[key] = value;
+						}
+					}
+				}
+
+				return { cwd, env, shell };
+			}
+		}
+
+		await new Promise(r => setTimeout(r, 100));
+	}
+
+	return null;
+}
+
+// ── Send and Wait ──────────────────────────────────────────────────────────
+
+export async function sendAndWait(
+	bg: BgProcess,
+	input: string,
+	waitPattern: string,
+	timeout: number,
+	signal?: AbortSignal,
+): Promise<{ matched: boolean; output: string }> {
+	// Snapshot the current position in the unified buffer before sending
+	const startIndex = bg.output.length;
+	bg.proc.stdin?.write(input + "\n");
+
+	let re: RegExp;
+	try {
+		re = new RegExp(waitPattern, "i");
+	} catch {
+		return { matched: false, output: "Invalid wait pattern regex" };
+	}
+
+	const start = Date.now();
+	while (Date.now() - start < timeout) {
+		if (signal?.aborted) {
+			const newEntries = bg.output.slice(startIndex);
+			return { matched: false, output: newEntries.map(e => e.line).join("\n") || "(cancelled)" };
+		}
+		const newEntries = bg.output.slice(startIndex);
+		for (const entry of newEntries) {
+			if (re.test(entry.line)) {
+				return { matched: true, output: newEntries.map(e => e.line).join("\n") };
+			}
+		}
+		await new Promise(r => setTimeout(r, 100));
+	}
+
+	const newEntries = bg.output.slice(startIndex);
+	return { matched: false, output: newEntries.map(e => e.line).join("\n") || "(no output)" };
+}
+
+// ── Run on Session ─────────────────────────────────────────────────────────
+
+export async function runOnSession(
+	bg: BgProcess,
+	command: string,
+	timeout: number,
+	signal?: AbortSignal,
+): Promise<{ exitCode: number; output: string; timedOut: boolean }> {
+	const sentinel = randomUUID().slice(0, 8);
+	const startMarker = `__GSD_SENTINEL_${sentinel}_START__`;
+	const endMarker = `__GSD_SENTINEL_${sentinel}_END__`;
+	const exitVar = `__GSD_EXIT_${sentinel}__`;
+
+	// Snapshot current output buffer position
+	const startIndex = bg.output.length;
+
+	// Write the sentinel-wrapped command to stdin
+	const wrappedCommand = [
+		`echo ${startMarker}`,
+		command,
+		`${exitVar}=$?`,
+		`echo ${endMarker} $${exitVar}`,
+	].join("\n");
+	bg.proc.stdin?.write(wrappedCommand + "\n");
+
+	const start = Date.now();
+	while (Date.now() - start < timeout) {
+		if (signal?.aborted) {
+			const newEntries = bg.output.slice(startIndex);
+			return { exitCode: -1, output: newEntries.map(e => e.line).join("\n") || "(cancelled)", timedOut: false };
+		}
+
+		// Process died while waiting
+		if (!bg.alive) {
+			const newEntries = bg.output.slice(startIndex);
+			const lines = newEntries.map(e => e.line);
+			return { exitCode: bg.proc.exitCode ?? -1, output: lines.join("\n") || "(process exited)", timedOut: false };
+		}
+
+		const newEntries = bg.output.slice(startIndex);
+		for (let i = 0; i < newEntries.length; i++) {
+			if (newEntries[i].line.includes(endMarker)) {
+				// Parse exit code from the END sentinel line
+				const endLine = newEntries[i].line;
+				const exitMatch = endLine.match(new RegExp(`${endMarker}\\s+(\\d+)`));
+				const exitCode = exitMatch ? parseInt(exitMatch[1], 10) : -1;
+
+				// Extract output between START and END sentinels
+				const outputLines: string[] = [];
+				let capturing = false;
+				for (let j = 0; j < newEntries.length; j++) {
+					if (newEntries[j].line.includes(startMarker)) {
+						capturing = true;
+						continue;
+					}
+					if (newEntries[j].line.includes(endMarker)) {
+						break;
+					}
+					if (capturing) {
+						outputLines.push(newEntries[j].line);
+					}
+				}
+
+				return { exitCode, output: outputLines.join("\n"), timedOut: false };
+			}
+		}
+
+		await new Promise(r => setTimeout(r, 100));
+	}
+
+	// Timed out
+	const newEntries = bg.output.slice(startIndex);
+	const outputLines: string[] = [];
+	let capturing = false;
+	for (const entry of newEntries) {
+		if (entry.line.includes(startMarker)) {
+			capturing = true;
+			continue;
+		}
+		if (capturing) {
+			outputLines.push(entry.line);
+		}
+	}
+	return { exitCode: -1, output: outputLines.join("\n") || "(no output)", timedOut: true };
+}
--- a/src/resources/extensions/bg-shell/output-formatter.ts
+++ b/src/resources/extensions/bg-shell/output-formatter.ts
@ -0,0 +1,259 @@
+/**
+ * Output analysis, digest generation, highlights extraction, and output retrieval.
+ */
+
+import {
+	truncateHead,
+	DEFAULT_MAX_BYTES,
+	DEFAULT_MAX_LINES,
+} from "@gsd/pi-coding-agent";
+import type { BgProcess, OutputDigest, OutputLine, GetOutputOptions } from "./types.js";
+import {
+	ERROR_PATTERNS,
+	WARNING_PATTERNS,
+	URL_PATTERN,
+	PORT_PATTERN,
+	READINESS_PATTERNS,
+	BUILD_COMPLETE_PATTERNS,
+	TEST_RESULT_PATTERNS,
+} from "./types.js";
+import { addEvent, pushAlert } from "./process-manager.js";
+import { transitionToReady } from "./readiness-detector.js";
+import { formatUptime, formatTimeAgo } from "./utilities.js";
+
+// ── Output Analysis ────────────────────────────────────────────────────────
+
+export function analyzeLine(bg: BgProcess, line: string, stream: "stdout" | "stderr"): void {
+	// Error detection
+	if (ERROR_PATTERNS.some(p => p.test(line))) {
+		bg.recentErrors.push(line.trim().slice(0, 200)); // Cap line length
+		if (bg.recentErrors.length > 50) bg.recentErrors.splice(0, bg.recentErrors.length - 50);
+
+		if (bg.status === "ready") {
+			bg.status = "error";
+			addEvent(bg, {
+				type: "error_detected",
+				detail: line.trim().slice(0, 200),
+				data: { errorCount: bg.recentErrors.length },
+			});
+			pushAlert(bg, `error_detected: ${line.trim().slice(0, 120)}`);
+		}
+	}
+
+	// Warning detection
+	if (WARNING_PATTERNS.some(p => p.test(line))) {
+		bg.recentWarnings.push(line.trim().slice(0, 200));
+		if (bg.recentWarnings.length > 50) bg.recentWarnings.splice(0, bg.recentWarnings.length - 50);
+	}
+
+	// URL extraction
+	const urlMatches = line.match(URL_PATTERN);
+	if (urlMatches) {
+		for (const url of urlMatches) {
+			if (!bg.urls.includes(url)) {
+				bg.urls.push(url);
+			}
+		}
+	}
+
+	// Port extraction
+	let portMatch: RegExpExecArray | null;
+	const portRe = new RegExp(PORT_PATTERN.source, PORT_PATTERN.flags);
+	while ((portMatch = portRe.exec(line)) !== null) {
+		const port = parseInt(portMatch[1], 10);
+		if (port > 0 && port <= 65535 && !bg.ports.includes(port)) {
+			bg.ports.push(port);
+			addEvent(bg, {
+				type: "port_open",
+				detail: `Port ${port} detected`,
+				data: { port },
+			});
+		}
+	}
+
+	// Readiness detection
+	if (bg.status === "starting") {
+		// Check custom ready pattern first
+		if (bg.readyPattern) {
+			try {
+				if (new RegExp(bg.readyPattern, "i").test(line)) {
+					transitionToReady(bg, `Custom pattern matched: ${line.trim().slice(0, 100)}`);
+				}
+			} catch { /* invalid regex, skip */ }
+		}
+
+		// Check built-in readiness patterns
+		if (bg.status === "starting" && READINESS_PATTERNS.some(p => p.test(line))) {
+			transitionToReady(bg, `Readiness pattern matched: ${line.trim().slice(0, 100)}`);
+		}
+	}
+
+	// Recovery detection: if we were in error and see a success pattern
+	if (bg.status === "error") {
+		if (READINESS_PATTERNS.some(p => p.test(line)) || BUILD_COMPLETE_PATTERNS.some(p => p.test(line))) {
+			bg.status = "ready";
+			bg.recentErrors = [];
+			addEvent(bg, { type: "recovered", detail: "Process recovered from error state" });
+			pushAlert(bg, "recovered — errors cleared");
+		}
+	}
+
+	// Dedup tracking
+	bg.totalRawLines++;
+	const lineHash = line.trim().slice(0, 100);
+	bg.lineDedup.set(lineHash, (bg.lineDedup.get(lineHash) || 0) + 1);
+}
+
+// ── Digest Generation ──────────────────────────────────────────────────────
+
+export function generateDigest(bg: BgProcess, mutate: boolean = false): OutputDigest {
+	// Change summary: what's different since last read
+	const newErrors = bg.recentErrors.length - bg.lastErrorCount;
+	const newWarnings = bg.recentWarnings.length - bg.lastWarningCount;
+	const newLines = bg.output.length - bg.lastReadIndex;
+
+	let changeSummary: string;
+	if (newLines === 0) {
+		changeSummary = "no new output";
+	} else {
+		const parts: string[] = [];
+		parts.push(`${newLines} new lines`);
+		if (newErrors > 0) parts.push(`${newErrors} new errors`);
+		if (newWarnings > 0) parts.push(`${newWarnings} new warnings`);
+		changeSummary = parts.join(", ");
+	}
+
+	// Only mutate snapshot counters when explicitly requested (e.g. from tool calls)
+	if (mutate) {
+		bg.lastErrorCount = bg.recentErrors.length;
+		bg.lastWarningCount = bg.recentWarnings.length;
+	}
+
+	return {
+		status: bg.status,
+		uptime: formatUptime(Date.now() - bg.startedAt),
+		errors: bg.recentErrors.slice(-5), // Last 5 errors
+		warnings: bg.recentWarnings.slice(-3), // Last 3 warnings
+		urls: bg.urls,
+		ports: bg.ports,
+		lastActivity: bg.events.length > 0
+			? formatTimeAgo(bg.events[bg.events.length - 1].timestamp)
+			: "none",
+		outputLines: bg.output.length,
+		changeSummary,
+	};
+}
+
+// ── Highlight Extraction ───────────────────────────────────────────────────
+
+export function getHighlights(bg: BgProcess, maxLines: number = 15): string[] {
+	const lines: string[] = [];
+
+	// Collect significant lines
+	const significant: { line: string; score: number; idx: number }[] = [];
+	for (let i = 0; i < bg.output.length; i++) {
+		const entry = bg.output[i];
+		let score = 0;
+		if (ERROR_PATTERNS.some(p => p.test(entry.line))) score += 10;
+		if (WARNING_PATTERNS.some(p => p.test(entry.line))) score += 5;
+		if (URL_PATTERN.test(entry.line)) score += 3;
+		if (READINESS_PATTERNS.some(p => p.test(entry.line))) score += 8;
+		if (TEST_RESULT_PATTERNS.some(p => p.test(entry.line))) score += 7;
+		if (BUILD_COMPLETE_PATTERNS.some(p => p.test(entry.line))) score += 6;
+		// Boost recent lines so highlights favor fresh output over stale
+		if (i >= bg.output.length - 50) score += 2;
+		if (score > 0) {
+			significant.push({ line: entry.line.trim().slice(0, 300), score, idx: i });
+		}
+	}
+
+	// Sort by significance (tie-break by recency)
+	significant.sort((a, b) => b.score - a.score || b.idx - a.idx);
+	const top = significant.slice(0, maxLines);
+
+	if (top.length === 0) {
+		// If nothing significant, show last few lines
+		const tail = bg.output.slice(-5);
+		for (const l of tail) lines.push(l.line.trim().slice(0, 300));
+	} else {
+		for (const entry of top) lines.push(entry.line);
+	}
+
+	return lines;
+}
+
+// ── Output Retrieval (multi-tier) ──────────────────────────────────────────
+
+export function getOutput(bg: BgProcess, opts: GetOutputOptions): string {
+	const { stream, tail, filter, incremental } = opts;
+
+	// Get the relevant slice of the unified buffer (already in chronological order)
+	let entries: OutputLine[];
+	if (incremental) {
+		entries = bg.output.slice(bg.lastReadIndex);
+		bg.lastReadIndex = bg.output.length;
+	} else {
+		entries = [...bg.output];
+	}
+
+	// Filter by stream if requested
+	if (stream !== "both") {
+		entries = entries.filter(e => e.stream === stream);
+	}
+
+	// Apply regex filter
+	if (filter) {
+		try {
+			const re = new RegExp(filter, "i");
+			entries = entries.filter(e => re.test(e.line));
+		} catch { /* invalid regex */ }
+	}
+
+	// Tail
+	if (tail && tail > 0 && entries.length > tail) {
+		entries = entries.slice(-tail);
+	}
+
+	const lines = entries.map(e => e.line);
+	const raw = lines.join("\n");
+	const truncation = truncateHead(raw, {
+		maxLines: DEFAULT_MAX_LINES,
+		maxBytes: DEFAULT_MAX_BYTES,
+	});
+
+	let result = truncation.content;
+	if (truncation.truncated) {
+		result += `\n\n[Output truncated: showing ${truncation.outputLines}/${truncation.totalLines} lines]`;
+	}
+	return result;
+}
+
+// ── Format Digest for LLM ──────────────────────────────────────────────────
+
+export function formatDigestText(bg: BgProcess, digest: OutputDigest): string {
+	let text = `Process ${bg.id} (${bg.label}):\n`;
+	text += `  status: ${digest.status}\n`;
+	text += `  type: ${bg.processType}\n`;
+	text += `  uptime: ${digest.uptime}\n`;
+
+	if (digest.ports.length > 0) text += `  ports: ${digest.ports.join(", ")}\n`;
+	if (digest.urls.length > 0) text += `  urls: ${digest.urls.join(", ")}\n`;
+
+	text += `  output: ${digest.outputLines} lines\n`;
+	text += `  changes: ${digest.changeSummary}`;
+
+	if (digest.errors.length > 0) {
+		text += `\n  errors (${digest.errors.length}):`;
+		for (const err of digest.errors) {
+			text += `\n    - ${err}`;
+		}
+	}
+	if (digest.warnings.length > 0) {
+		text += `\n  warnings (${digest.warnings.length}):`;
+		for (const w of digest.warnings) {
+			text += `\n    - ${w}`;
+		}
+	}
+
+	return text;
+}
--- a/src/resources/extensions/bg-shell/overlay.ts
+++ b/src/resources/extensions/bg-shell/overlay.ts
@ -0,0 +1,432 @@
+/**
+ * TUI: Background Process Manager Overlay.
+ */
+
+import type { Theme } from "@gsd/pi-coding-agent";
+import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui";
+import type { BgProcess, ProcessStatus } from "./types.js";
+import { ERROR_PATTERNS, WARNING_PATTERNS } from "./types.js";
+import { formatUptime, formatTimeAgo } from "./utilities.js";
+import {
+	processes,
+	killProcess,
+	cleanupAll,
+	restartProcess,
+} from "./process-manager.js";
+
+export class BgManagerOverlay {
+	private tui: { requestRender: () => void };
+	private theme: Theme;
+	private onClose: () => void;
+	private selected = 0;
+	private mode: "list" | "output" | "events" = "list";
+	private viewingProcess: BgProcess | null = null;
+	private scrollOffset = 0;
+	private cachedWidth?: number;
+	private cachedLines?: string[];
+	private refreshTimer: ReturnType<typeof setInterval>;
+
+	constructor(
+		tui: { requestRender: () => void },
+		theme: Theme,
+		onClose: () => void,
+	) {
+		this.tui = tui;
+		this.theme = theme;
+		this.onClose = onClose;
+		this.refreshTimer = setInterval(() => {
+			this.invalidate();
+			this.tui.requestRender();
+		}, 1000);
+	}
+
+	private getProcessList(): BgProcess[] {
+		return Array.from(processes.values());
+	}
+
+	selectAndView(index: number): void {
+		const procs = this.getProcessList();
+		if (index >= 0 && index < procs.length) {
+			this.selected = index;
+			this.viewingProcess = procs[index];
+			this.mode = "output";
+			this.scrollOffset = Math.max(0, procs[index].output.length - 20);
+		}
+	}
+
+	handleInput(data: string): void {
+		if (this.mode === "output") {
+			this.handleOutputInput(data);
+			return;
+		}
+		if (this.mode === "events") {
+			this.handleEventsInput(data);
+			return;
+		}
+		this.handleListInput(data);
+	}
+
+	private handleListInput(data: string): void {
+		const procs = this.getProcessList();
+
+		if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c")) || matchesKey(data, Key.ctrlAlt("b"))) {
+			clearInterval(this.refreshTimer);
+			this.onClose();
+			return;
+		}
+
+		if (matchesKey(data, Key.up) || matchesKey(data, "k")) {
+			if (this.selected > 0) {
+				this.selected--;
+				this.invalidate();
+				this.tui.requestRender();
+			}
+			return;
+		}
+
+		if (matchesKey(data, Key.down) || matchesKey(data, "j")) {
+			if (this.selected < procs.length - 1) {
+				this.selected++;
+				this.invalidate();
+				this.tui.requestRender();
+			}
+			return;
+		}
+
+		if (matchesKey(data, Key.enter)) {
+			const proc = procs[this.selected];
+			if (proc) {
+				this.viewingProcess = proc;
+				this.mode = "output";
+				this.scrollOffset = Math.max(0, proc.output.length - 20);
+				this.invalidate();
+				this.tui.requestRender();
+			}
+			return;
+		}
+
+		// e = view events
+		if (data === "e") {
+			const proc = procs[this.selected];
+			if (proc) {
+				this.viewingProcess = proc;
+				this.mode = "events";
+				this.scrollOffset = Math.max(0, proc.events.length - 15);
+				this.invalidate();
+				this.tui.requestRender();
+			}
+			return;
+		}
+
+		// r = restart
+		if (data === "r") {
+			const proc = procs[this.selected];
+			if (proc) {
+				restartProcess(proc.id).then(() => {
+					this.invalidate();
+					this.tui.requestRender();
+				});
+			}
+			return;
+		}
+
+		// x or d = kill selected
+		if (data === "x" || data === "d") {
+			const proc = procs[this.selected];
+			if (proc && proc.alive) {
+				killProcess(proc.id, "SIGTERM");
+				setTimeout(() => {
+					if (proc.alive) killProcess(proc.id, "SIGKILL");
+					this.invalidate();
+					this.tui.requestRender();
+				}, 300);
+			}
+			return;
+		}
+
+		// X or D = kill all
+		if (data === "X" || data === "D") {
+			cleanupAll();
+			this.selected = 0;
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+	}
+
+	private handleOutputInput(data: string): void {
+		if (matchesKey(data, Key.escape) || matchesKey(data, "q")) {
+			this.mode = "list";
+			this.viewingProcess = null;
+			this.scrollOffset = 0;
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		// Tab to switch to events view
+		if (matchesKey(data, Key.tab)) {
+			this.mode = "events";
+			if (this.viewingProcess) {
+				this.scrollOffset = Math.max(0, this.viewingProcess.events.length - 15);
+			}
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (matchesKey(data, Key.down) || matchesKey(data, "j")) {
+			if (this.viewingProcess) {
+				const total = this.viewingProcess.output.length;
+				this.scrollOffset = Math.min(this.scrollOffset + 5, Math.max(0, total - 20));
+			}
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (matchesKey(data, Key.up) || matchesKey(data, "k")) {
+			this.scrollOffset = Math.max(0, this.scrollOffset - 5);
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (data === "G") {
+			if (this.viewingProcess) {
+				const total = this.viewingProcess.output.length;
+				this.scrollOffset = Math.max(0, total - 20);
+			}
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (data === "g") {
+			this.scrollOffset = 0;
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+	}
+
+	private handleEventsInput(data: string): void {
+		if (matchesKey(data, Key.escape) || matchesKey(data, "q")) {
+			this.mode = "list";
+			this.viewingProcess = null;
+			this.scrollOffset = 0;
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		// Tab to switch back to output view
+		if (matchesKey(data, Key.tab)) {
+			this.mode = "output";
+			if (this.viewingProcess) {
+				this.scrollOffset = Math.max(0, this.viewingProcess.output.length - 20);
+			}
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (matchesKey(data, Key.down) || matchesKey(data, "j")) {
+			if (this.viewingProcess) {
+				this.scrollOffset = Math.min(this.scrollOffset + 3, Math.max(0, this.viewingProcess.events.length - 10));
+			}
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+
+		if (matchesKey(data, Key.up) || matchesKey(data, "k")) {
+			this.scrollOffset = Math.max(0, this.scrollOffset - 3);
+			this.invalidate();
+			this.tui.requestRender();
+			return;
+		}
+	}
+
+	render(width: number): string[] {
+		if (this.cachedLines && this.cachedWidth === width) {
+			return this.cachedLines;
+		}
+
+		let lines: string[];
+		if (this.mode === "events") {
+			lines = this.renderEvents(width);
+		} else if (this.mode === "output") {
+			lines = this.renderOutput(width);
+		} else {
+			lines = this.renderList(width);
+		}
+
+		this.cachedWidth = width;
+		this.cachedLines = lines;
+		return lines;
+	}
+
+	private box(inner: string[], width: number): string[] {
+		const th = this.theme;
+		const bdr = (s: string) => th.fg("borderMuted", s);
+		const iw = width - 4;
+		const lines: string[] = [];
+
+		lines.push(bdr("╭" + "─".repeat(width - 2) + "╮"));
+		for (const line of inner) {
+			const truncated = truncateToWidth(line, iw);
+			const pad = Math.max(0, iw - visibleWidth(truncated));
+			lines.push(bdr("│") + " " + truncated + " ".repeat(pad) + " " + bdr("│"));
+		}
+		lines.push(bdr("╰" + "─".repeat(width - 2) + "╯"));
+		return lines;
+	}
+
+	private renderList(width: number): string[] {
+		const th = this.theme;
+		const procs = this.getProcessList();
+		const inner: string[] = [];
+
+		if (procs.length === 0) {
+			inner.push(th.fg("dim", "No background processes."));
+			inner.push("");
+			inner.push(th.fg("dim", "esc close"));
+			return this.box(inner, width);
+		}
+
+		inner.push(th.fg("dim", "Background Processes"));
+		inner.push("");
+
+		for (let i = 0; i < procs.length; i++) {
+			const p = procs[i];
+			const sel = i === this.selected;
+			const pointer = sel ? th.fg("accent", "▸ ") : "  ";
+
+			const statusIcon = p.alive
+				? (p.status === "ready" ? th.fg("success", "●")
+					: p.status === "error" ? th.fg("error", "●")
+					: th.fg("warning", "●"))
+				: th.fg("dim", "○");
+
+			const uptime = th.fg("dim", formatUptime(Date.now() - p.startedAt));
+			const name = sel ? th.fg("text", p.label) : th.fg("muted", p.label);
+			const typeTag = th.fg("dim", `[${p.processType}]`);
+			const portInfo = p.ports.length > 0 ? th.fg("dim", ` :${p.ports.join(",")}`) : "";
+			const errBadge = p.recentErrors.length > 0 ? th.fg("error", ` ⚠${p.recentErrors.length}`) : "";
+			const groupTag = p.group ? th.fg("dim", ` {${p.group}}`) : "";
+			const restartBadge = p.restartCount > 0 ? th.fg("warning", ` ↻${p.restartCount}`) : "";
+
+			const status = p.alive ? "" : "  " + th.fg("dim", `exit ${p.exitCode}`);
+
+			inner.push(`${pointer}${statusIcon} ${name} ${typeTag} ${uptime}${portInfo}${errBadge}${groupTag}${restartBadge}${status}`);
+		}
+
+		inner.push("");
+		inner.push(th.fg("dim", "↑↓ select · enter output · e events · r restart · x kill · esc close"));
+
+		return this.box(inner, width);
+	}
+
+	private renderOutput(width: number): string[] {
+		const th = this.theme;
+		const p = this.viewingProcess;
+		if (!p) return [""];
+		const inner: string[] = [];
+
+		const statusIcon = p.alive
+			? (p.status === "ready" ? th.fg("success", "●")
+				: p.status === "error" ? th.fg("error", "●")
+				: th.fg("warning", "●"))
+			: th.fg("dim", "○");
+		const name = th.fg("muted", p.label);
+		const uptime = th.fg("dim", formatUptime(Date.now() - p.startedAt));
+		const typeTag = th.fg("dim", `[${p.processType}]`);
+		const portInfo = p.ports.length > 0 ? th.fg("dim", ` :${p.ports.join(",")}`) : "";
+		const tabIndicator = th.fg("accent", "[Output]") + " " + th.fg("dim", "Events");
+
+		inner.push(`${statusIcon} ${name} ${typeTag} ${uptime}${portInfo}  ${tabIndicator}`);
+		inner.push("");
+
+		// Unified buffer is already chronologically interleaved
+		const allOutput = p.output;
+
+		const maxVisible = 18;
+		const visible = allOutput.slice(this.scrollOffset, this.scrollOffset + maxVisible);
+
+		if (allOutput.length === 0) {
+			inner.push(th.fg("dim", "(no output)"));
+		} else {
+			for (const entry of visible) {
+				const isError = ERROR_PATTERNS.some(pat => pat.test(entry.line));
+				const isWarning = !isError && WARNING_PATTERNS.some(pat => pat.test(entry.line));
+				const prefix = entry.stream === "stderr" ? th.fg("error", "⚠ ") : "";
+				const color = isError ? "error" : isWarning ? "warning" : "dim";
+				inner.push(prefix + th.fg(color, entry.line));
+			}
+
+			if (allOutput.length > maxVisible) {
+				inner.push("");
+				const pos = `${this.scrollOffset + 1}–${Math.min(this.scrollOffset + maxVisible, allOutput.length)} of ${allOutput.length}`;
+				inner.push(th.fg("dim", pos));
+			}
+		}
+
+		inner.push("");
+		inner.push(th.fg("dim", "↑↓ scroll · g/G top/end · tab events · q back"));
+
+		return this.box(inner, width);
+	}
+
+	private renderEvents(width: number): string[] {
+		const th = this.theme;
+		const p = this.viewingProcess;
+		if (!p) return [""];
+		const inner: string[] = [];
+
+		const statusIcon = p.alive
+			? (p.status === "ready" ? th.fg("success", "●")
+				: p.status === "error" ? th.fg("error", "●")
+				: th.fg("warning", "●"))
+			: th.fg("dim", "○");
+		const name = th.fg("muted", p.label);
+		const uptime = th.fg("dim", formatUptime(Date.now() - p.startedAt));
+		const tabIndicator = th.fg("dim", "Output") + " " + th.fg("accent", "[Events]");
+
+		inner.push(`${statusIcon} ${name} ${uptime}  ${tabIndicator}`);
+		inner.push("");
+
+		if (p.events.length === 0) {
+			inner.push(th.fg("dim", "(no events)"));
+		} else {
+			const maxVisible = 15;
+			const visible = p.events.slice(this.scrollOffset, this.scrollOffset + maxVisible);
+
+			for (const ev of visible) {
+				const time = th.fg("dim", formatTimeAgo(ev.timestamp));
+				const typeColor = ev.type === "crashed" || ev.type === "error_detected" ? "error"
+					: ev.type === "ready" || ev.type === "recovered" ? "success"
+					: ev.type === "port_open" ? "accent"
+					: "dim";
+				const typeLabel = th.fg(typeColor, ev.type);
+				inner.push(`${time}  ${typeLabel}`);
+				inner.push(`  ${th.fg("dim", ev.detail.slice(0, 80))}`);
+			}
+
+			if (p.events.length > maxVisible) {
+				inner.push("");
+				inner.push(th.fg("dim", `${this.scrollOffset + 1}–${Math.min(this.scrollOffset + maxVisible, p.events.length)} of ${p.events.length} events`));
+			}
+		}
+
+		inner.push("");
+		inner.push(th.fg("dim", "↑↓ scroll · tab output · q back"));
+
+		return this.box(inner, width);
+	}
+
+	invalidate(): void {
+		this.cachedWidth = undefined;
+		this.cachedLines = undefined;
+	}
+}
--- a/src/resources/extensions/bg-shell/process-manager.ts
+++ b/src/resources/extensions/bg-shell/process-manager.ts
@ -0,0 +1,404 @@
+/**
+ * Process lifecycle management: start, stop, restart, signal, state tracking,
+ * process registry, and persistence.
+ */
+
+import { spawn, spawnSync } from "node:child_process";
+import { randomUUID } from "node:crypto";
+import { writeFileSync, readFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+import { getShellConfig, sanitizeCommand } from "@gsd/pi-coding-agent";
+import type {
+	BgProcess,
+	BgProcessInfo,
+	ProcessEvent,
+	ProcessManifest,
+	ProcessType,
+	StartOptions,
+} from "./types.js";
+import {
+	MAX_BUFFER_LINES,
+	MAX_EVENTS,
+	DEAD_PROCESS_TTL,
+} from "./types.js";
+import { restoreWindowsVTInput, formatUptime } from "./utilities.js";
+import { analyzeLine } from "./output-formatter.js";
+import { startPortProbing, transitionToReady } from "./readiness-detector.js";
+
+// ── Process Registry ───────────────────────────────────────────────────────
+
+export const processes = new Map<string, BgProcess>();
+
+/** Pending alerts to inject into the next agent context */
+export let pendingAlerts: string[] = [];
+
+/** Replace the pendingAlerts array (used by the extension entry point) */
+export function setPendingAlerts(alerts: string[]): void {
+	pendingAlerts = alerts;
+}
+
+export function addOutputLine(bg: BgProcess, stream: "stdout" | "stderr", line: string): void {
+	bg.output.push({ stream, line, ts: Date.now() });
+	if (bg.output.length > MAX_BUFFER_LINES) {
+		const excess = bg.output.length - MAX_BUFFER_LINES;
+		bg.output.splice(0, excess);
+		// Adjust the read cursor so incremental delivery stays correct
+		bg.lastReadIndex = Math.max(0, bg.lastReadIndex - excess);
+	}
+}
+
+export function addEvent(bg: BgProcess, event: Omit<ProcessEvent, "timestamp">): void {
+	const ev: ProcessEvent = { ...event, timestamp: Date.now() };
+	bg.events.push(ev);
+	if (bg.events.length > MAX_EVENTS) {
+		bg.events.splice(0, bg.events.length - MAX_EVENTS);
+	}
+}
+
+export function pushAlert(bg: BgProcess, message: string): void {
+	pendingAlerts.push(`[bg:${bg.id} ${bg.label}] ${message}`);
+}
+
+export function getInfo(p: BgProcess): BgProcessInfo {
+	const stdoutLines = p.output.filter(l => l.stream === "stdout").length;
+	const stderrLines = p.output.filter(l => l.stream === "stderr").length;
+	return {
+		id: p.id,
+		label: p.label,
+		command: p.command,
+		cwd: p.cwd,
+		startedAt: p.startedAt,
+		alive: p.alive,
+		exitCode: p.exitCode,
+		signal: p.signal,
+		outputLines: p.output.length,
+		stdoutLines,
+		stderrLines,
+		status: p.status,
+		processType: p.processType,
+		ports: p.ports,
+		urls: p.urls,
+		group: p.group,
+		restartCount: p.restartCount,
+		uptime: formatUptime(Date.now() - p.startedAt),
+		recentErrorCount: p.recentErrors.length,
+		recentWarningCount: p.recentWarnings.length,
+		eventCount: p.events.length,
+	};
+}
+
+// ── Process Type Detection ─────────────────────────────────────────────────
+
+export function detectProcessType(command: string): ProcessType {
+	const cmd = command.toLowerCase();
+
+	// Server patterns
+	if (
+		/\b(serve|server|dev|start)\b/.test(cmd) &&
+		/\b(npm|yarn|pnpm|bun|node|next|vite|nuxt|astro|remix|gatsby|uvicorn|flask|django|rails|cargo)\b/.test(cmd)
+	) return "server";
+	if (/\b(uvicorn|gunicorn|flask\s+run|manage\.py\s+runserver|rails\s+s)\b/.test(cmd)) return "server";
+	if (/\b(http-server|live-server|serve)\b/.test(cmd)) return "server";
+
+	// Build patterns
+	if (/\b(build|compile|make|tsc|webpack|rollup|esbuild|swc)\b/.test(cmd)) {
+		if (/\b(watch|--watch|-w)\b/.test(cmd)) return "watcher";
+		return "build";
+	}
+
+	// Test patterns
+	if (/\b(test|jest|vitest|mocha|pytest|cargo\s+test|go\s+test|rspec)\b/.test(cmd)) return "test";
+
+	// Watcher patterns
+	if (/\b(watch|nodemon|chokidar|fswatch|inotifywait)\b/.test(cmd)) return "watcher";
+
+	return "generic";
+}
+
+// ── Process Start ──────────────────────────────────────────────────────────
+
+export function startProcess(opts: StartOptions): BgProcess {
+	const id = randomUUID().slice(0, 8);
+	const processType = opts.type || detectProcessType(opts.command);
+
+	const env = { ...process.env, ...(opts.env || {}) };
+
+	const { shell, args: shellArgs } = getShellConfig();
+	// Shell sessions default to the user's shell if no command specified
+	const command = processType === "shell" && !opts.command ? shell : opts.command;
+	const proc = spawn(shell, [...shellArgs, sanitizeCommand(command)], {
+		cwd: opts.cwd,
+		stdio: ["pipe", "pipe", "pipe"],
+		env,
+		detached: process.platform !== "win32",
+	});
+
+	const bg: BgProcess = {
+		id,
+		label: opts.label || command.slice(0, 60),
+		command,
+		cwd: opts.cwd,
+		startedAt: Date.now(),
+		proc,
+		output: [],
+		exitCode: null,
+		signal: null,
+		alive: true,
+		lastReadIndex: 0,
+		processType,
+		status: "starting",
+		ports: [],
+		urls: [],
+		recentErrors: [],
+		recentWarnings: [],
+		events: [],
+		readyPattern: opts.readyPattern || null,
+		readyPort: opts.readyPort || null,
+		wasReady: false,
+		group: opts.group || null,
+		lastErrorCount: 0,
+		lastWarningCount: 0,
+		commandHistory: [],
+		lineDedup: new Map(),
+		totalRawLines: 0,
+		envKeys: Object.keys(opts.env || {}),
+		restartCount: 0,
+		startConfig: {
+			command,
+			cwd: opts.cwd,
+			label: opts.label || command.slice(0, 60),
+			processType,
+			readyPattern: opts.readyPattern || null,
+			readyPort: opts.readyPort || null,
+			group: opts.group || null,
+		},
+	};
+
+	addEvent(bg, { type: "started", detail: `Process started: ${command.slice(0, 100)}` });
+
+	proc.stdout?.on("data", (chunk: Buffer) => {
+		const lines = chunk.toString().split("\n");
+		for (const line of lines) {
+			if (line.length > 0) {
+				addOutputLine(bg, "stdout", line);
+				analyzeLine(bg, line, "stdout");
+			}
+		}
+	});
+
+	proc.stderr?.on("data", (chunk: Buffer) => {
+		const lines = chunk.toString().split("\n");
+		for (const line of lines) {
+			if (line.length > 0) {
+				addOutputLine(bg, "stderr", line);
+				analyzeLine(bg, line, "stderr");
+			}
+		}
+	});
+
+	proc.on("exit", (code, sig) => {
+		restoreWindowsVTInput();
+		bg.alive = false;
+		bg.exitCode = code;
+		bg.signal = sig ?? null;
+
+		if (code === 0) {
+			bg.status = "exited";
+			addEvent(bg, { type: "exited", detail: `Exited cleanly (code 0)` });
+		} else {
+			bg.status = "crashed";
+			const lastErrors = bg.recentErrors.slice(-3).join("; ");
+			const detail = `Crashed with code ${code}${sig ? ` (signal ${sig})` : ""}${lastErrors ? ` — ${lastErrors}` : ""}`;
+			addEvent(bg, {
+				type: "crashed",
+				detail,
+				data: { exitCode: code, signal: sig, lastErrors: bg.recentErrors.slice(-5) },
+			});
+			pushAlert(bg, `CRASHED (code ${code})${lastErrors ? `: ${lastErrors.slice(0, 120)}` : ""}`);
+		}
+	});
+
+	proc.on("error", (err) => {
+		bg.alive = false;
+		bg.status = "crashed";
+		addOutputLine(bg, "stderr", `[spawn error] ${err.message}`);
+		addEvent(bg, { type: "crashed", detail: `Spawn error: ${err.message}` });
+		pushAlert(bg, `spawn error: ${err.message}`);
+	});
+
+	// Port probing for server-type processes
+	if (bg.readyPort) {
+		startPortProbing(bg, bg.readyPort, opts.readyTimeout);
+	}
+
+	// Shell sessions are ready immediately after spawn
+	if (bg.processType === "shell") {
+		setTimeout(() => {
+			if (bg.alive && bg.status === "starting") {
+				transitionToReady(bg, "Shell session initialized");
+			}
+		}, 200);
+	}
+
+	processes.set(id, bg);
+	return bg;
+}
+
+// ── Process Kill ───────────────────────────────────────────────────────────
+
+export function killProcess(id: string, sig: NodeJS.Signals = "SIGTERM"): boolean {
+	const bg = processes.get(id);
+	if (!bg) return false;
+	if (!bg.alive) return true;
+	try {
+		if (process.platform === "win32") {
+			// Windows: use taskkill /F /T to force-kill the entire process tree.
+			// process.kill(-pid) (Unix process groups) does not work on Windows.
+			if (bg.proc.pid) {
+				const result = spawnSync("taskkill", ["/F", "/T", "/PID", String(bg.proc.pid)], {
+					timeout: 5000,
+					encoding: "utf-8",
+				});
+				if (result.status !== 0 && result.status !== 128) {
+					// taskkill failed — try the direct kill as fallback
+					bg.proc.kill(sig);
+				}
+			} else {
+				bg.proc.kill(sig);
+			}
+		} else {
+			// Unix/macOS: kill the process group via negative PID
+			if (bg.proc.pid) {
+				try {
+					process.kill(-bg.proc.pid, sig);
+				} catch {
+					bg.proc.kill(sig);
+				}
+			} else {
+				bg.proc.kill(sig);
+			}
+		}
+		return true;
+	} catch {
+		return false;
+	}
+}
+
+// ── Process Restart ────────────────────────────────────────────────────────
+
+export async function restartProcess(id: string): Promise<BgProcess | null> {
+	const old = processes.get(id);
+	if (!old) return null;
+
+	const config = old.startConfig;
+	const restartCount = old.restartCount + 1;
+
+	// Kill old process
+	if (old.alive) {
+		killProcess(id, "SIGTERM");
+		await new Promise(r => setTimeout(r, 300));
+		if (old.alive) {
+			killProcess(id, "SIGKILL");
+			await new Promise(r => setTimeout(r, 200));
+		}
+	}
+	processes.delete(id);
+
+	// Start new one
+	const newBg = startProcess({
+		command: config.command,
+		cwd: config.cwd,
+		label: config.label,
+		type: config.processType,
+		readyPattern: config.readyPattern || undefined,
+		readyPort: config.readyPort || undefined,
+		group: config.group || undefined,
+	});
+	newBg.restartCount = restartCount;
+
+	return newBg;
+}
+
+// ── Group Operations ───────────────────────────────────────────────────────
+
+export function getGroupProcesses(group: string): BgProcess[] {
+	return Array.from(processes.values()).filter(p => p.group === group);
+}
+
+export function getGroupStatus(group: string): {
+	group: string;
+	healthy: boolean;
+	processes: { id: string; label: string; status: import("./types.js").ProcessStatus; alive: boolean }[];
+} {
+	const procs = getGroupProcesses(group);
+	const healthy = procs.length > 0 && procs.every(p => p.alive && (p.status === "ready" || p.status === "starting"));
+	return {
+		group,
+		healthy,
+		processes: procs.map(p => ({
+			id: p.id,
+			label: p.label,
+			status: p.status,
+			alive: p.alive,
+		})),
+	};
+}
+
+// ── Cleanup ────────────────────────────────────────────────────────────────
+
+export function pruneDeadProcesses(): void {
+	const now = Date.now();
+	for (const [id, bg] of processes) {
+		if (!bg.alive) {
+			const ttl = bg.processType === "shell" ? DEAD_PROCESS_TTL * 6 : DEAD_PROCESS_TTL;
+			if (now - bg.startedAt > ttl) {
+				processes.delete(id);
+			}
+		}
+	}
+}
+
+export function cleanupAll(): void {
+	for (const [id, bg] of processes) {
+		if (bg.alive) killProcess(id, "SIGKILL");
+	}
+	processes.clear();
+}
+
+// ── Persistence ────────────────────────────────────────────────────────────
+
+export function getManifestPath(cwd: string): string {
+	const dir = join(cwd, ".bg-shell");
+	if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+	return join(dir, "manifest.json");
+}
+
+export function persistManifest(cwd: string): void {
+	try {
+		const manifest: ProcessManifest[] = Array.from(processes.values())
+			.filter(p => p.alive)
+			.map(p => ({
+				id: p.id,
+				label: p.label,
+				command: p.command,
+				cwd: p.cwd,
+				startedAt: p.startedAt,
+				processType: p.processType,
+				group: p.group,
+				readyPattern: p.readyPattern,
+				readyPort: p.readyPort,
+				pid: p.proc.pid,
+			}));
+		writeFileSync(getManifestPath(cwd), JSON.stringify(manifest, null, 2));
+	} catch { /* best effort */ }
+}
+
+export function loadManifest(cwd: string): ProcessManifest[] {
+	try {
+		const path = getManifestPath(cwd);
+		if (existsSync(path)) {
+			return JSON.parse(readFileSync(path, "utf-8"));
+		}
+	} catch { /* best effort */ }
+	return [];
+}
--- a/src/resources/extensions/bg-shell/readiness-detector.ts
+++ b/src/resources/extensions/bg-shell/readiness-detector.ts
@ -0,0 +1,126 @@
+/**
+ * Readiness detection: port probing, pattern matching, wait-for-ready.
+ */
+
+import { createConnection } from "node:net";
+import type { BgProcess } from "./types.js";
+import {
+	PORT_PROBE_TIMEOUT,
+	READY_POLL_INTERVAL,
+	DEFAULT_READY_TIMEOUT,
+} from "./types.js";
+import { addEvent, pushAlert } from "./process-manager.js";
+
+// ── Readiness Transition ───────────────────────────────────────────────────
+
+export function transitionToReady(bg: BgProcess, detail: string): void {
+	bg.status = "ready";
+	bg.wasReady = true;
+	addEvent(bg, { type: "ready", detail });
+}
+
+// ── Port Probing ───────────────────────────────────────────────────────────
+
+export function probePort(port: number, host: string = "127.0.0.1"): Promise<boolean> {
+	return new Promise((resolve) => {
+		const socket = createConnection({ port, host, timeout: PORT_PROBE_TIMEOUT }, () => {
+			socket.destroy();
+			resolve(true);
+		});
+		socket.on("error", () => {
+			socket.destroy();
+			resolve(false);
+		});
+		socket.on("timeout", () => {
+			socket.destroy();
+			resolve(false);
+		});
+	});
+}
+
+// ── Port Probing Loop ──────────────────────────────────────────────────────
+
+export function startPortProbing(bg: BgProcess, port: number, customTimeout?: number): void {
+	const timeout = customTimeout || DEFAULT_READY_TIMEOUT;
+	const interval = setInterval(async () => {
+		if (!bg.alive) {
+			clearInterval(interval);
+			const stderrLines = bg.output.filter(l => l.stream === "stderr").slice(-10).map(l => l.line);
+			const detail = `Process exited (code ${bg.exitCode}) before port ${port} opened${stderrLines.length > 0 ? ` — ${stderrLines.join("; ").slice(0, 200)}` : ""}`;
+			addEvent(bg, { type: "port_timeout", detail, data: { port, exitCode: bg.exitCode } });
+			return;
+		}
+		if (bg.status !== "starting") {
+			clearInterval(interval);
+			return;
+		}
+		const open = await probePort(port);
+		if (open) {
+			clearInterval(interval);
+			if (!bg.ports.includes(port)) bg.ports.push(port);
+			transitionToReady(bg, `Port ${port} is open`);
+			addEvent(bg, { type: "port_open", detail: `Port ${port} is open`, data: { port } });
+		}
+	}, READY_POLL_INTERVAL);
+
+	// Stop probing after timeout — transition to error state so the process
+	// doesn't stay in "starting" forever (fixes #428)
+	setTimeout(() => {
+		clearInterval(interval);
+		if (bg.alive && bg.status === "starting") {
+			const stderrLines = bg.output.filter(l => l.stream === "stderr").slice(-10).map(l => l.line);
+			const detail = `Port ${port} not open after ${timeout}ms${stderrLines.length > 0 ? ` — ${stderrLines.join("; ").slice(0, 200)}` : ""}`;
+			bg.status = "error";
+			addEvent(bg, { type: "port_timeout", detail, data: { port, timeout } });
+			pushAlert(bg, `Port ${port} readiness timeout after ${timeout / 1000}s`);
+		}
+	}, timeout);
+}
+
+// ── Wait for Ready ─────────────────────────────────────────────────────────
+
+export async function waitForReady(bg: BgProcess, timeout: number, signal?: AbortSignal): Promise<{ ready: boolean; detail: string }> {
+	const start = Date.now();
+
+	while (Date.now() - start < timeout) {
+		if (signal?.aborted) {
+			return { ready: false, detail: "Cancelled" };
+		}
+		if (!bg.alive) {
+			const stderrLines = bg.output.filter(l => l.stream === "stderr").slice(-5).map(l => l.line);
+			const stderrContext = stderrLines.length > 0 ? `\nstderr:\n${stderrLines.join("\n").slice(0, 500)}` : "";
+			return {
+				ready: false,
+				detail: `Process exited before becoming ready (code ${bg.exitCode})${bg.recentErrors.length > 0 ? ` — ${bg.recentErrors.slice(-1)[0]}` : ""}${stderrContext}`,
+			};
+		}
+		if (bg.status === "error") {
+			const stderrLines = bg.output.filter(l => l.stream === "stderr").slice(-5).map(l => l.line);
+			const stderrContext = stderrLines.length > 0 ? `\nstderr:\n${stderrLines.join("\n").slice(0, 500)}` : "";
+			return {
+				ready: false,
+				detail: `Process entered error state${bg.readyPort ? ` (port ${bg.readyPort} never opened)` : ""}${stderrContext}`,
+			};
+		}
+		if (bg.status === "ready") {
+			return {
+				ready: true,
+				detail: bg.events.find(e => e.type === "ready")?.detail || "Process is ready",
+			};
+		}
+		await new Promise(r => setTimeout(r, READY_POLL_INTERVAL));
+	}
+
+	// Timeout — try port probe as last resort
+	if (bg.readyPort) {
+		const open = await probePort(bg.readyPort);
+		if (open) {
+			transitionToReady(bg, `Port ${bg.readyPort} is open (detected at timeout)`);
+			return { ready: true, detail: `Port ${bg.readyPort} is open` };
+		}
+	}
+
+	const stderrLines = bg.output.filter(l => l.stream === "stderr").slice(-5).map(l => l.line);
+	const stderrContext = stderrLines.length > 0 ? `\nstderr:\n${stderrLines.join("\n").slice(0, 500)}` : "";
+	return { ready: false, detail: `Timed out after ${timeout}ms waiting for ready signal${stderrContext}` };
+}
--- a/src/resources/extensions/bg-shell/types.ts
+++ b/src/resources/extensions/bg-shell/types.ts
@ -0,0 +1,251 @@
+/**
+ * Shared types, constants, and pattern databases for the bg-shell extension.
+ */
+
+// ── Types ──────────────────────────────────────────────────────────────────
+
+export type ProcessStatus =
+	| "starting"
+	| "ready"
+	| "error"
+	| "exited"
+	| "crashed";
+
+export type ProcessType = "server" | "build" | "test" | "watcher" | "generic" | "shell";
+
+export interface ProcessEvent {
+	type:
+		| "started"
+		| "ready"
+		| "error_detected"
+		| "recovered"
+		| "exited"
+		| "crashed"
+		| "output"
+		| "port_open"
+		| "pattern_match"
+		| "port_timeout";
+	timestamp: number;
+	detail: string;
+	data?: Record<string, unknown>;
+}
+
+export interface OutputDigest {
+	status: ProcessStatus;
+	uptime: string;
+	errors: string[];
+	warnings: string[];
+	urls: string[];
+	ports: number[];
+	lastActivity: string;
+	outputLines: number;
+	changeSummary: string;
+}
+
+export interface OutputLine {
+	stream: "stdout" | "stderr";
+	line: string;
+	ts: number;
+}
+
+export interface BgProcess {
+	id: string;
+	label: string;
+	command: string;
+	cwd: string;
+	startedAt: number;
+	proc: import("node:child_process").ChildProcess;
+	/** Unified chronologically-interleaved output buffer */
+	output: OutputLine[];
+	exitCode: number | null;
+	signal: string | null;
+	alive: boolean;
+	/** Tracks how many lines in the unified output buffer the LLM has already seen */
+	lastReadIndex: number;
+	/** Process classification */
+	processType: ProcessType;
+	/** Current lifecycle status */
+	status: ProcessStatus;
+	/** Detected ports */
+	ports: number[];
+	/** Detected URLs */
+	urls: string[];
+	/** Accumulated errors since last read */
+	recentErrors: string[];
+	/** Accumulated warnings since last read */
+	recentWarnings: string[];
+	/** Lifecycle events log */
+	events: ProcessEvent[];
+	/** Ready pattern (regex string) */
+	readyPattern: string | null;
+	/** Ready port to probe */
+	readyPort: number | null;
+	/** Whether readiness was ever achieved */
+	wasReady: boolean;
+	/** Group membership */
+	group: string | null;
+	/** Last error count snapshot for diff detection */
+	lastErrorCount: number;
+	/** Last warning count snapshot for diff detection */
+	lastWarningCount: number;
+	/** Command history for shell-type sessions */
+	commandHistory: string[];
+	/** Dedup tracker: hash → count of repeated lines */
+	lineDedup: Map<string, number>;
+	/** Total raw lines (before dedup) for token savings calc */
+	totalRawLines: number;
+	/** Env snapshot (keys only, no values for security) */
+	envKeys: string[];
+	/** Restart count */
+	restartCount: number;
+	/** Original start config for restart */
+	startConfig: { command: string; cwd: string; label: string; processType: ProcessType; readyPattern: string | null; readyPort: number | null; group: string | null };
+}
+
+export interface BgProcessInfo {
+	id: string;
+	label: string;
+	command: string;
+	cwd: string;
+	startedAt: number;
+	alive: boolean;
+	exitCode: number | null;
+	signal: string | null;
+	outputLines: number;
+	stdoutLines: number;
+	stderrLines: number;
+	status: ProcessStatus;
+	processType: ProcessType;
+	ports: number[];
+	urls: string[];
+	group: string | null;
+	restartCount: number;
+	uptime: string;
+	recentErrorCount: number;
+	recentWarningCount: number;
+	eventCount: number;
+}
+
+export interface StartOptions {
+	command: string;
+	cwd: string;
+	label?: string;
+	type?: ProcessType;
+	readyPattern?: string;
+	readyPort?: number;
+	readyTimeout?: number;
+	group?: string;
+	env?: Record<string, string>;
+}
+
+export interface GetOutputOptions {
+	stream: "stdout" | "stderr" | "both";
+	tail?: number;
+	filter?: string;
+	incremental?: boolean;
+}
+
+export interface ProcessManifest {
+	id: string;
+	label: string;
+	command: string;
+	cwd: string;
+	startedAt: number;
+	processType: ProcessType;
+	group: string | null;
+	readyPattern: string | null;
+	readyPort: number | null;
+	pid: number | undefined;
+}
+
+// ── Constants ──────────────────────────────────────────────────────────────
+
+export const MAX_BUFFER_LINES = 5000;
+export const MAX_EVENTS = 200;
+export const DEAD_PROCESS_TTL = 10 * 60 * 1000;
+export const PORT_PROBE_TIMEOUT = 500;
+export const READY_POLL_INTERVAL = 250;
+export const DEFAULT_READY_TIMEOUT = 30000;
+
+// ── Pattern Databases ──────────────────────────────────────────────────────
+
+/** Patterns that indicate a process is ready/listening */
+export const READINESS_PATTERNS: RegExp[] = [
+	// Node/JS servers
+	/listening\s+on\s+(?:port\s+)?(\d+)/i,
+	/server\s+(?:is\s+)?(?:running|started|listening)\s+(?:at|on)\s+/i,
+	/ready\s+(?:in|on|at)\s+/i,
+	/started\s+(?:server\s+)?on\s+/i,
+	// Next.js / Vite / etc
+	/Local:\s*https?:\/\//i,
+	/➜\s+Local:\s*/i,
+	/compiled\s+(?:successfully|client\s+and\s+server)/i,
+	// Python
+	/running\s+on\s+https?:\/\//i,
+	/Uvicorn\s+running/i,
+	/Development\s+server\s+is\s+running/i,
+	// Generic
+	/press\s+ctrl[\-+]c\s+to\s+(?:quit|stop)/i,
+	/watching\s+for\s+(?:file\s+)?changes/i,
+	/build\s+(?:completed|succeeded|finished)/i,
+];
+
+/** Patterns that indicate errors */
+export const ERROR_PATTERNS: RegExp[] = [
+	/\berror\b[\s:[\](]/i,
+	/\bERROR\b/,
+	/\bfailed\b/i,
+	/\bFAILED\b/,
+	/\bfatal\b/i,
+	/\bFATAL\b/,
+	/\bexception\b/i,
+	/\bpanic\b/i,
+	/\bsegmentation\s+fault\b/i,
+	/\bsyntax\s*error\b/i,
+	/\btype\s*error\b/i,
+	/\breference\s*error\b/i,
+	/Cannot\s+find\s+module/i,
+	/Module\s+not\s+found/i,
+	/ENOENT/,
+	/EACCES/,
+	/EADDRINUSE/,
+	/TS\d{4,5}:/,     // TypeScript errors
+	/E\d{4,5}:/,      // Rust errors
+	/\[ERROR\]/,
+	/✖|✗|❌/,          // Common error symbols
+];
+
+/** Patterns that indicate warnings */
+export const WARNING_PATTERNS: RegExp[] = [
+	/\bwarning\b[\s:[\](]/i,
+	/\bWARN(?:ING)?\b/,
+	/\bdeprecated\b/i,
+	/\bDEPRECATED\b/,
+	/⚠️?/,
+	/\[WARN\]/,
+];
+
+/** Patterns to extract URLs */
+export const URL_PATTERN = /https?:\/\/[^\s"'<>)\]]+/gi;
+
+/** Patterns to extract port numbers from "listening" messages */
+export const PORT_PATTERN = /(?:port|listening\s+on|:)\s*(\d{2,5})\b/gi;
+
+/** Patterns indicating test results */
+export const TEST_RESULT_PATTERNS: RegExp[] = [
+	/(\d+)\s+(?:tests?\s+)?passed/i,
+	/(\d+)\s+(?:tests?\s+)?failed/i,
+	/Tests?:\s+(\d+)\s+passed/i,
+	/(\d+)\s+passing/i,
+	/(\d+)\s+failing/i,
+	/PASS|FAIL/,
+];
+
+/** Patterns indicating build completion */
+export const BUILD_COMPLETE_PATTERNS: RegExp[] = [
+	/build\s+(?:completed|succeeded|finished|done)/i,
+	/compiled\s+(?:successfully|with\s+\d+\s+(?:error|warning))/i,
+	/✓\s+Built/i,
+	/webpack\s+\d+\.\d+/i,
+	/bundle\s+(?:is\s+)?ready/i,
+];
--- a/src/resources/extensions/bg-shell/utilities.ts
+++ b/src/resources/extensions/bg-shell/utilities.ts
@ -0,0 +1,55 @@
+/**
+ * Utility functions for the bg-shell extension.
+ */
+
+import { createRequire } from "node:module";
+
+// ── Windows VT Input Restoration ────────────────────────────────────────────
+// Child processes (esp. Git Bash / MSYS2) can strip the ENABLE_VIRTUAL_TERMINAL_INPUT
+// flag from the shared stdin console handle. Re-enable it after each child exits.
+
+let _vtHandles: { GetConsoleMode: Function; SetConsoleMode: Function; handle: unknown } | null = null;
+export function restoreWindowsVTInput(): void {
+	if (process.platform !== "win32") return;
+	try {
+		if (!_vtHandles) {
+			const cjsRequire = createRequire(import.meta.url);
+			const koffi = cjsRequire("koffi");
+			const k32 = koffi.load("kernel32.dll");
+			const GetStdHandle = k32.func("void* __stdcall GetStdHandle(int)");
+			const GetConsoleMode = k32.func("bool __stdcall GetConsoleMode(void*, _Out_ uint32_t*)");
+			const SetConsoleMode = k32.func("bool __stdcall SetConsoleMode(void*, uint32_t)");
+			const handle = GetStdHandle(-10);
+			_vtHandles = { GetConsoleMode, SetConsoleMode, handle };
+		}
+		const ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200;
+		const mode = new Uint32Array(1);
+		_vtHandles.GetConsoleMode(_vtHandles.handle, mode);
+		if (!(mode[0] & ENABLE_VIRTUAL_TERMINAL_INPUT)) {
+			_vtHandles.SetConsoleMode(_vtHandles.handle, mode[0] | ENABLE_VIRTUAL_TERMINAL_INPUT);
+		}
+	} catch { /* koffi not available on non-Windows */ }
+}
+
+// ── Time Formatting ────────────────────────────────────────────────────────
+
+export function formatUptime(ms: number): string {
+	const seconds = Math.floor(ms / 1000);
+	if (seconds < 60) return `${seconds}s`;
+	const minutes = Math.floor(seconds / 60);
+	if (minutes < 60) return `${minutes}m ${seconds % 60}s`;
+	const hours = Math.floor(minutes / 60);
+	return `${hours}h ${minutes % 60}m`;
+}
+
+export function formatTimeAgo(timestamp: number): string {
+	return formatUptime(Date.now() - timestamp) + " ago";
+}
+
+export function formatTokenCount(count: number): string {
+	if (count < 1000) return count.toString();
+	if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
+	if (count < 1000000) return `${Math.round(count / 1000)}k`;
+	if (count < 10000000) return `${(count / 1000000).toFixed(1)}M`;
+	return `${Math.round(count / 1000000)}M`;
+}
--- a/src/resources/extensions/gsd/activity-log.ts
+++ b/src/resources/extensions/gsd/activity-log.ts
@ -8,10 +8,11 @@
 * Diagnostic extraction is handled by session-forensics.ts.
 */

-import { writeFileSync, mkdirSync, readdirSync, unlinkSync, statSync } from "node:fs";
-import { existsSync } from "node:fs";
+import { writeFileSync, mkdirSync, readdirSync, unlinkSync, statSync, openSync, closeSync, constants } from "node:fs";
 import { createHash } from "node:crypto";
 import { join } from "node:path";
+
+const SEQ_PREFIX_RE = /^(\d+)-/;
 import type { ExtensionContext } from "@gsd/pi-coding-agent";
 import { gsdRoot } from "./paths.js";

@ -26,10 +27,11 @@ function scanNextSequence(activityDir: string): number {
  let maxSeq = 0;
  try {
    for (const f of readdirSync(activityDir)) {
-      const match = f.match(/^(\d+)-/);
+      const match = f.match(SEQ_PREFIX_RE);
      if (match) maxSeq = Math.max(maxSeq, parseInt(match[1], 10));
    }
-  } catch {
+  } catch (e) {
+    void e; /* directory not readable — start at 1 */
    return 1;
  }
  return maxSeq + 1;
@ -55,14 +57,24 @@ function nextActivityFilePath(
  unitType: string,
  safeUnitId: string,
 ): string {
-  while (true) {
+  // Use O_CREAT | O_EXCL for atomic "create if absent" — no directory scan needed.
+  for (let attempts = 0; attempts < 1000; attempts++) {
    const seq = String(state.nextSeq).padStart(3, "0");
    const filePath = join(activityDir, `${seq}-${unitType}-${safeUnitId}.jsonl`);
-    if (!existsSync(filePath)) {
+    try {
+      const fd = openSync(filePath, constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY);
+      closeSync(fd);
      return filePath;
+    } catch (err: any) {
+      if (err?.code === "EEXIST") {
+        state.nextSeq++;
+        continue;
+      }
+      throw err;
    }
-    state.nextSeq = scanNextSequence(activityDir);
  }
+  // Fallback: should never reach here in practice
+  throw new Error(`Failed to find available activity log sequence in ${activityDir}`);
 }

 export function saveActivityLog(
@ -89,8 +101,9 @@ export function saveActivityLog(
    writeFileSync(filePath, content, "utf-8");
    state.nextSeq += 1;
    state.lastSnapshotKeyByUnit.set(unitKey, key);
-  } catch {
+  } catch (e) {
    // Don't let logging failures break auto-mode
+    void e;
  }
 }

@ -99,7 +112,7 @@ export function pruneActivityLogs(activityDir: string, retentionDays: number): v
    const files = readdirSync(activityDir);
    const entries: { seq: number; filePath: string }[] = [];
    for (const f of files) {
-      const match = f.match(/^(\d+)-/);
+      const match = f.match(SEQ_PREFIX_RE);
      if (match) entries.push({ seq: parseInt(match[1], 10), filePath: join(activityDir, f) });
    }
    if (entries.length === 0) return;
--- a/src/resources/extensions/gsd/auto-dashboard.ts
+++ b/src/resources/extensions/gsd/auto-dashboard.ts
@ -0,0 +1,450 @@
+/**
+ * Auto-mode Dashboard — progress widget rendering, elapsed time formatting,
+ * unit description helpers, and slice progress caching.
+ *
+ * Pure functions that accept specific parameters — no module-level globals
+ * or AutoContext dependency. State accessors are passed as callbacks.
+ */
+
+import type { ExtensionContext, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+import type { GSDState } from "./types.js";
+import { getCurrentBranch } from "./worktree.js";
+import { getActiveHook } from "./post-unit-hooks.js";
+import { getLedger, getProjectTotals, formatCost, formatTokenCount } from "./metrics.js";
+import {
+  resolveMilestoneFile,
+  resolveSliceFile,
+} from "./paths.js";
+import { parseRoadmap, parsePlan } from "./files.js";
+import { readFileSync, existsSync } from "node:fs";
+import { truncateToWidth, visibleWidth } from "@gsd/pi-tui";
+import { makeUI, GLYPH, INDENT } from "../shared/ui.js";
+
+// ─── Dashboard Data ───────────────────────────────────────────────────────────
+
+/** Dashboard data for the overlay */
+export interface AutoDashboardData {
+  active: boolean;
+  paused: boolean;
+  stepMode: boolean;
+  startTime: number;
+  elapsed: number;
+  currentUnit: { type: string; id: string; startedAt: number } | null;
+  completedUnits: { type: string; id: string; startedAt: number; finishedAt: number }[];
+  basePath: string;
+  /** Running cost and token totals from metrics ledger */
+  totalCost: number;
+  totalTokens: number;
+  /** Projected remaining cost based on unit-type averages (undefined if insufficient data) */
+  projectedRemainingCost?: number;
+  /** Whether token profile has been auto-downgraded due to budget prediction */
+  profileDowngraded?: boolean;
+}
+
+// ─── Unit Description Helpers ─────────────────────────────────────────────────
+
+export function unitVerb(unitType: string): string {
+  if (unitType.startsWith("hook/")) return `hook: ${unitType.slice(5)}`;
+  switch (unitType) {
+    case "research-milestone":
+    case "research-slice": return "researching";
+    case "plan-milestone":
+    case "plan-slice": return "planning";
+    case "execute-task": return "executing";
+    case "complete-slice": return "completing";
+    case "replan-slice": return "replanning";
+    case "rewrite-docs": return "rewriting";
+    case "reassess-roadmap": return "reassessing";
+    case "run-uat": return "running UAT";
+    default: return unitType;
+  }
+}
+
+export function unitPhaseLabel(unitType: string): string {
+  if (unitType.startsWith("hook/")) return "HOOK";
+  switch (unitType) {
+    case "research-milestone": return "RESEARCH";
+    case "research-slice": return "RESEARCH";
+    case "plan-milestone": return "PLAN";
+    case "plan-slice": return "PLAN";
+    case "execute-task": return "EXECUTE";
+    case "complete-slice": return "COMPLETE";
+    case "replan-slice": return "REPLAN";
+    case "rewrite-docs": return "REWRITE";
+    case "reassess-roadmap": return "REASSESS";
+    case "run-uat": return "UAT";
+    default: return unitType.toUpperCase();
+  }
+}
+
+function peekNext(unitType: string, state: GSDState): string {
+  // Show active hook info in progress display
+  const activeHookState = getActiveHook();
+  if (activeHookState) {
+    return `hook: ${activeHookState.hookName} (cycle ${activeHookState.cycle})`;
+  }
+
+  const sid = state.activeSlice?.id ?? "";
+  if (unitType.startsWith("hook/")) return `continue ${sid}`;
+  switch (unitType) {
+    case "research-milestone": return "plan milestone roadmap";
+    case "plan-milestone": return "plan or execute first slice";
+    case "research-slice": return `plan ${sid}`;
+    case "plan-slice": return "execute first task";
+    case "execute-task": return `continue ${sid}`;
+    case "complete-slice": return "reassess roadmap";
+    case "replan-slice": return `re-execute ${sid}`;
+    case "rewrite-docs": return "continue execution";
+    case "reassess-roadmap": return "advance to next slice";
+    case "run-uat": return "reassess roadmap";
+    default: return "";
+  }
+}
+
+/**
+ * Describe what the next unit will be, based on current state.
+ */
+export function describeNextUnit(state: GSDState): { label: string; description: string } {
+  const sid = state.activeSlice?.id;
+  const sTitle = state.activeSlice?.title;
+  const tid = state.activeTask?.id;
+  const tTitle = state.activeTask?.title;
+
+  switch (state.phase) {
+    case "needs-discussion":
+      return { label: "Discuss milestone draft", description: "Milestone has a draft context — needs discussion before planning." };
+    case "pre-planning":
+      return { label: "Research & plan milestone", description: "Scout the landscape and create the roadmap." };
+    case "planning":
+      return { label: `Plan ${sid}: ${sTitle}`, description: "Research and decompose into tasks." };
+    case "executing":
+      return { label: `Execute ${tid}: ${tTitle}`, description: "Run the next task in a fresh session." };
+    case "summarizing":
+      return { label: `Complete ${sid}: ${sTitle}`, description: "Write summary, UAT, and merge to main." };
+    case "replanning-slice":
+      return { label: `Replan ${sid}: ${sTitle}`, description: "Blocker found — replan the slice." };
+    case "completing-milestone":
+      return { label: "Complete milestone", description: "Write milestone summary." };
+    default:
+      return { label: "Continue", description: "Execute the next step." };
+  }
+}
+
+// ─── Elapsed Time Formatting ──────────────────────────────────────────────────
+
+/** Format elapsed time since auto-mode started */
+export function formatAutoElapsed(autoStartTime: number): string {
+  if (!autoStartTime) return "";
+  const ms = Date.now() - autoStartTime;
+  const s = Math.floor(ms / 1000);
+  if (s < 60) return `${s}s`;
+  const m = Math.floor(s / 60);
+  const rs = s % 60;
+  if (m < 60) return `${m}m${rs > 0 ? ` ${rs}s` : ""}`;
+  const h = Math.floor(m / 60);
+  const rm = m % 60;
+  return `${h}h ${rm}m`;
+}
+
+/** Format token counts for compact display */
+export function formatWidgetTokens(count: number): string {
+  if (count < 1000) return count.toString();
+  if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
+  if (count < 1000000) return `${Math.round(count / 1000)}k`;
+  if (count < 10000000) return `${(count / 1000000).toFixed(1)}M`;
+  return `${Math.round(count / 1000000)}M`;
+}
+
+// ─── Slice Progress Cache ─────────────────────────────────────────────────────
+
+/** Cached slice progress for the widget — avoid async in render */
+let cachedSliceProgress: {
+  done: number;
+  total: number;
+  milestoneId: string;
+  /** Real task progress for the active slice, if its plan file exists */
+  activeSliceTasks: { done: number; total: number } | null;
+} | null = null;
+
+export function updateSliceProgressCache(base: string, mid: string, activeSid?: string): void {
+  try {
+    const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+    if (!roadmapFile) return;
+    const content = readFileSync(roadmapFile, "utf-8");
+    const roadmap = parseRoadmap(content);
+
+    let activeSliceTasks: { done: number; total: number } | null = null;
+    if (activeSid) {
+      try {
+        const planFile = resolveSliceFile(base, mid, activeSid, "PLAN");
+        if (planFile && existsSync(planFile)) {
+          const planContent = readFileSync(planFile, "utf-8");
+          const plan = parsePlan(planContent);
+          activeSliceTasks = {
+            done: plan.tasks.filter(t => t.done).length,
+            total: plan.tasks.length,
+          };
+        }
+      } catch {
+        // Non-fatal — just omit task count
+      }
+    }
+
+    cachedSliceProgress = {
+      done: roadmap.slices.filter(s => s.done).length,
+      total: roadmap.slices.length,
+      milestoneId: mid,
+      activeSliceTasks,
+    };
+  } catch {
+    // Non-fatal — widget just won't show progress bar
+  }
+}
+
+export function getRoadmapSlicesSync(): { done: number; total: number; activeSliceTasks: { done: number; total: number } | null } | null {
+  return cachedSliceProgress;
+}
+
+export function clearSliceProgressCache(): void {
+  cachedSliceProgress = null;
+}
+
+// ─── Footer Factory ───────────────────────────────────────────────────────────
+
+/**
+ * Footer factory that renders zero lines — hides the built-in footer entirely.
+ * All footer info (pwd, branch, tokens, cost, model) is shown inside the
+ * progress widget instead, so there's no gap or redundancy.
+ */
+export const hideFooter = () => ({
+  render(_width: number): string[] { return []; },
+  invalidate() {},
+  dispose() {},
+});
+
+// ─── Progress Widget ──────────────────────────────────────────────────────────
+
+/** State accessors passed to updateProgressWidget to avoid direct global access */
+export interface WidgetStateAccessors {
+  getAutoStartTime(): number;
+  isStepMode(): boolean;
+  getCmdCtx(): ExtensionCommandContext | null;
+  getBasePath(): string;
+  isVerbose(): boolean;
+}
+
+export function updateProgressWidget(
+  ctx: ExtensionContext,
+  unitType: string,
+  unitId: string,
+  state: GSDState,
+  accessors: WidgetStateAccessors,
+): void {
+  if (!ctx.hasUI) return;
+
+  const verb = unitVerb(unitType);
+  const phaseLabel = unitPhaseLabel(unitType);
+  const mid = state.activeMilestone;
+  const slice = state.activeSlice;
+  const task = state.activeTask;
+  const next = peekNext(unitType, state);
+
+  // Cache git branch at widget creation time (not per render)
+  let cachedBranch: string | null = null;
+  try { cachedBranch = getCurrentBranch(accessors.getBasePath()); } catch { /* not in git repo */ }
+
+  // Cache pwd with ~ substitution
+  let widgetPwd = process.cwd();
+  const widgetHome = process.env.HOME || process.env.USERPROFILE;
+  if (widgetHome && widgetPwd.startsWith(widgetHome)) {
+    widgetPwd = `~${widgetPwd.slice(widgetHome.length)}`;
+  }
+  if (cachedBranch) widgetPwd = `${widgetPwd} (${cachedBranch})`;
+
+  ctx.ui.setWidget("gsd-progress", (tui, theme) => {
+    let pulseBright = true;
+    let cachedLines: string[] | undefined;
+    let cachedWidth: number | undefined;
+
+    const pulseTimer = setInterval(() => {
+      pulseBright = !pulseBright;
+      cachedLines = undefined;
+      tui.requestRender();
+    }, 800);
+
+    // Refresh progress cache from disk every 5s so the widget reflects
+    // task/slice completion mid-unit. Without this, the progress bar only
+    // updates at dispatch time, appearing frozen during long-running units.
+    const progressRefreshTimer = mid ? setInterval(() => {
+      try {
+        updateSliceProgressCache(accessors.getBasePath(), mid.id, slice?.id);
+        cachedLines = undefined;
+      } catch { /* non-fatal */ }
+    }, 5_000) : null;
+
+    return {
+      render(width: number): string[] {
+        if (cachedLines && cachedWidth === width) return cachedLines;
+
+        const ui = makeUI(theme, width);
+        const lines: string[] = [];
+        const pad = INDENT.base;
+
+        // ── Line 1: Top bar ───────────────────────────────────────────────
+        lines.push(...ui.bar());
+
+        const dot = pulseBright
+          ? theme.fg("accent", GLYPH.statusActive)
+          : theme.fg("dim", GLYPH.statusPending);
+        const elapsed = formatAutoElapsed(accessors.getAutoStartTime());
+        const modeTag = accessors.isStepMode() ? "NEXT" : "AUTO";
+        const headerLeft = `${pad}${dot} ${theme.fg("accent", theme.bold("GSD"))}  ${theme.fg("success", modeTag)}`;
+        const headerRight = elapsed ? theme.fg("dim", elapsed) : "";
+        lines.push(rightAlign(headerLeft, headerRight, width));
+
+        lines.push("");
+
+        if (mid) {
+          lines.push(truncateToWidth(`${pad}${theme.fg("dim", mid.title)}`, width));
+        }
+
+        if (slice && unitType !== "research-milestone" && unitType !== "plan-milestone") {
+          lines.push(truncateToWidth(
+            `${pad}${theme.fg("text", theme.bold(`${slice.id}: ${slice.title}`))}`,
+            width,
+          ));
+        }
+
+        lines.push("");
+
+        const target = task ? `${task.id}: ${task.title}` : unitId;
+        const actionLeft = `${pad}${theme.fg("accent", "▸")} ${theme.fg("accent", verb)}  ${theme.fg("text", target)}`;
+        const phaseBadge = theme.fg("dim", phaseLabel);
+        lines.push(rightAlign(actionLeft, phaseBadge, width));
+        lines.push("");
+
+        if (mid) {
+          const roadmapSlices = getRoadmapSlicesSync();
+          if (roadmapSlices) {
+            const { done, total, activeSliceTasks } = roadmapSlices;
+            const barWidth = Math.max(8, Math.min(24, Math.floor(width * 0.3)));
+            const pct = total > 0 ? done / total : 0;
+            const filled = Math.round(pct * barWidth);
+            const bar = theme.fg("success", "█".repeat(filled))
+              + theme.fg("dim", "░".repeat(barWidth - filled));
+
+            let meta = theme.fg("dim", `${done}/${total} slices`);
+
+            if (activeSliceTasks && activeSliceTasks.total > 0) {
+              meta += theme.fg("dim", `  ·  task ${activeSliceTasks.done + 1}/${activeSliceTasks.total}`);
+            }
+
+            lines.push(truncateToWidth(`${pad}${bar}  ${meta}`, width));
+          }
+        }
+
+        lines.push("");
+
+        if (next) {
+          lines.push(truncateToWidth(
+            `${pad}${theme.fg("dim", "→")} ${theme.fg("dim", `then ${next}`)}`,
+            width,
+          ));
+        }
+
+        // ── Footer info (pwd, tokens, cost, context, model) ──────────────
+        lines.push("");
+        lines.push(truncateToWidth(theme.fg("dim", `${pad}${widgetPwd}`), width, theme.fg("dim", "…")));
+
+        // Token stats from current unit session + cumulative cost from metrics
+        {
+          const cmdCtx = accessors.getCmdCtx();
+          let totalInput = 0, totalOutput = 0;
+          let totalCacheRead = 0, totalCacheWrite = 0;
+          if (cmdCtx) {
+            for (const entry of cmdCtx.sessionManager.getEntries()) {
+              if (entry.type === "message" && (entry as any).message?.role === "assistant") {
+                const u = (entry as any).message.usage;
+                if (u) {
+                  totalInput += u.input || 0;
+                  totalOutput += u.output || 0;
+                  totalCacheRead += u.cacheRead || 0;
+                  totalCacheWrite += u.cacheWrite || 0;
+                }
+              }
+            }
+          }
+          const mLedger = getLedger();
+          const autoTotals = mLedger ? getProjectTotals(mLedger.units) : null;
+          const cumulativeCost = autoTotals?.cost ?? 0;
+
+          const cxUsage = cmdCtx?.getContextUsage?.();
+          const cxWindow = cxUsage?.contextWindow ?? cmdCtx?.model?.contextWindow ?? 0;
+          const cxPctVal = cxUsage?.percent ?? 0;
+          const cxPct = cxUsage?.percent !== null ? cxPctVal.toFixed(1) : "?";
+
+          const sp: string[] = [];
+          if (totalInput) sp.push(`↑${formatWidgetTokens(totalInput)}`);
+          if (totalOutput) sp.push(`↓${formatWidgetTokens(totalOutput)}`);
+          if (totalCacheRead) sp.push(`R${formatWidgetTokens(totalCacheRead)}`);
+          if (totalCacheWrite) sp.push(`W${formatWidgetTokens(totalCacheWrite)}`);
+          if (cumulativeCost) sp.push(`$${cumulativeCost.toFixed(3)}`);
+
+          const cxDisplay = cxPct === "?"
+            ? `?/${formatWidgetTokens(cxWindow)}`
+            : `${cxPct}%/${formatWidgetTokens(cxWindow)}`;
+          if (cxPctVal > 90) {
+            sp.push(theme.fg("error", cxDisplay));
+          } else if (cxPctVal > 70) {
+            sp.push(theme.fg("warning", cxDisplay));
+          } else {
+            sp.push(cxDisplay);
+          }
+
+          const sLeft = sp.map(p => p.includes("\x1b[") ? p : theme.fg("dim", p))
+            .join(theme.fg("dim", " "));
+
+          const modelId = cmdCtx?.model?.id ?? "";
+          const modelProvider = cmdCtx?.model?.provider ?? "";
+          const modelPhase = phaseLabel ? theme.fg("dim", `[${phaseLabel}] `) : "";
+          const modelDisplay = modelProvider && modelId
+            ? `${modelProvider}/${modelId}`
+            : modelId;
+          const sRight = modelDisplay
+            ? `${modelPhase}${theme.fg("dim", modelDisplay)}`
+            : "";
+          lines.push(rightAlign(`${pad}${sLeft}`, sRight, width));
+        }
+
+        const hintParts: string[] = [];
+        hintParts.push("esc pause");
+        hintParts.push(process.platform === "darwin" ? "⌃⌥G dashboard" : "Ctrl+Alt+G dashboard");
+        lines.push(...ui.hints(hintParts));
+
+        lines.push(...ui.bar());
+
+        cachedLines = lines;
+        cachedWidth = width;
+        return lines;
+      },
+      invalidate() {
+        cachedLines = undefined;
+        cachedWidth = undefined;
+      },
+      dispose() {
+        clearInterval(pulseTimer);
+        if (progressRefreshTimer) clearInterval(progressRefreshTimer);
+      },
+    };
+  });
+}
+
+// ─── Right-align Helper ───────────────────────────────────────────────────────
+
+/** Right-align helper: build a line with left content and right content. */
+function rightAlign(left: string, right: string, width: number): string {
+  const leftVis = visibleWidth(left);
+  const rightVis = visibleWidth(right);
+  const gap = Math.max(1, width - leftVis - rightVis);
+  return truncateToWidth(left + " ".repeat(gap) + right, width);
+}
--- a/src/resources/extensions/gsd/auto-dispatch.ts
+++ b/src/resources/extensions/gsd/auto-dispatch.ts
@ -0,0 +1,294 @@
+/**
+ * Auto-mode Dispatch Table — declarative phase → unit mapping.
+ *
+ * Each rule maps a GSD state to the unit type, unit ID, and prompt builder
+ * that should be dispatched. Rules are evaluated in order; the first match wins.
+ *
+ * This replaces the 130-line if-else chain in dispatchNextUnit with a
+ * data structure that is inspectable, testable per-rule, and extensible
+ * without modifying orchestration code.
+ */
+
+import type { GSDState } from "./types.js";
+import type { GSDPreferences } from "./preferences.js";
+import type { UatType } from "./files.js";
+import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
+import {
+  resolveMilestoneFile, resolveSliceFile,
+  relSliceFile,
+} from "./paths.js";
+import {
+  buildResearchMilestonePrompt,
+  buildPlanMilestonePrompt,
+  buildResearchSlicePrompt,
+  buildPlanSlicePrompt,
+  buildExecuteTaskPrompt,
+  buildCompleteSlicePrompt,
+  buildCompleteMilestonePrompt,
+  buildReplanSlicePrompt,
+  buildRunUatPrompt,
+  buildReassessRoadmapPrompt,
+  buildRewriteDocsPrompt,
+  checkNeedsReassessment,
+  checkNeedsRunUat,
+} from "./auto-prompts.js";
+
+// ─── Types ────────────────────────────────────────────────────────────────
+
+export type DispatchAction =
+  | { action: "dispatch"; unitType: string; unitId: string; prompt: string; pauseAfterDispatch?: boolean }
+  | { action: "stop"; reason: string; level: "info" | "warning" | "error" }
+  | { action: "skip" };
+
+export interface DispatchContext {
+  basePath: string;
+  mid: string;
+  midTitle: string;
+  state: GSDState;
+  prefs: GSDPreferences | undefined;
+}
+
+interface DispatchRule {
+  /** Human-readable name for debugging and test identification */
+  name: string;
+  /** Return a DispatchAction if this rule matches, null to fall through */
+  match: (ctx: DispatchContext) => Promise<DispatchAction | null>;
+}
+
+// ─── Rewrite Circuit Breaker ──────────────────────────────────────────────
+
+const MAX_REWRITE_ATTEMPTS = 3;
+let rewriteAttemptCount = 0;
+export function resetRewriteCircuitBreaker(): void {
+  rewriteAttemptCount = 0;
+}
+
+// ─── Rules ────────────────────────────────────────────────────────────────
+
+const DISPATCH_RULES: DispatchRule[] = [
+  {
+    name: "rewrite-docs (override gate)",
+    match: async ({ mid, midTitle, state, basePath }) => {
+      const pendingOverrides = await loadActiveOverrides(basePath);
+      if (pendingOverrides.length === 0) return null;
+      if (rewriteAttemptCount >= MAX_REWRITE_ATTEMPTS) {
+        const { resolveAllOverrides } = await import("./files.js");
+        await resolveAllOverrides(basePath);
+        rewriteAttemptCount = 0;
+        return null;
+      }
+      rewriteAttemptCount++;
+      const unitId = state.activeSlice ? `${mid}/${state.activeSlice.id}` : mid;
+      return {
+        action: "dispatch",
+        unitType: "rewrite-docs",
+        unitId,
+        prompt: await buildRewriteDocsPrompt(mid, midTitle, state.activeSlice, basePath, pendingOverrides),
+      };
+    },
+  },
+  {
+    name: "summarizing → complete-slice",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (state.phase !== "summarizing") return null;
+      const sid = state.activeSlice!.id;
+      const sTitle = state.activeSlice!.title;
+      return {
+        action: "dispatch",
+        unitType: "complete-slice",
+        unitId: `${mid}/${sid}`,
+        prompt: await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "run-uat (post-completion)",
+    match: async ({ state, mid, basePath, prefs }) => {
+      const needsRunUat = await checkNeedsRunUat(basePath, mid, state, prefs);
+      if (!needsRunUat) return null;
+      const { sliceId, uatType } = needsRunUat;
+      const uatFile = resolveSliceFile(basePath, mid, sliceId, "UAT")!;
+      const uatContent = await loadFile(uatFile);
+      return {
+        action: "dispatch",
+        unitType: "run-uat",
+        unitId: `${mid}/${sliceId}`,
+        prompt: await buildRunUatPrompt(
+          mid, sliceId, relSliceFile(basePath, mid, sliceId, "UAT"), uatContent ?? "", basePath,
+        ),
+        pauseAfterDispatch: uatType !== "artifact-driven",
+      };
+    },
+  },
+  {
+    name: "reassess-roadmap (post-completion)",
+    match: async ({ state, mid, midTitle, basePath, prefs }) => {
+      // Phase skip: skip reassess when preference or profile says so
+      if (prefs?.phases?.skip_reassess) return null;
+      const needsReassess = await checkNeedsReassessment(basePath, mid, state);
+      if (!needsReassess) return null;
+      return {
+        action: "dispatch",
+        unitType: "reassess-roadmap",
+        unitId: `${mid}/${needsReassess.sliceId}`,
+        prompt: await buildReassessRoadmapPrompt(mid, midTitle, needsReassess.sliceId, basePath),
+      };
+    },
+  },
+  {
+    name: "needs-discussion → stop",
+    match: async ({ state, mid, midTitle }) => {
+      if (state.phase !== "needs-discussion") return null;
+      return {
+        action: "stop",
+        reason: `${mid}: ${midTitle} has draft context from a prior discussion — needs its own discussion before planning.\nRun /gsd to discuss.`,
+        level: "warning",
+      };
+    },
+  },
+  {
+    name: "pre-planning (no context) → stop",
+    match: async ({ state, mid, basePath }) => {
+      if (state.phase !== "pre-planning") return null;
+      const contextFile = resolveMilestoneFile(basePath, mid, "CONTEXT");
+      const hasContext = !!(contextFile && await loadFile(contextFile));
+      if (hasContext) return null; // fall through to next rule
+      return {
+        action: "stop",
+        reason: "No context or roadmap yet. Run /gsd to discuss first.",
+        level: "warning",
+      };
+    },
+  },
+  {
+    name: "pre-planning (no research) → research-milestone",
+    match: async ({ state, mid, midTitle, basePath, prefs }) => {
+      if (state.phase !== "pre-planning") return null;
+      // Phase skip: skip research when preference or profile says so
+      if (prefs?.phases?.skip_research) return null;
+      const researchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
+      if (researchFile) return null; // has research, fall through
+      return {
+        action: "dispatch",
+        unitType: "research-milestone",
+        unitId: mid,
+        prompt: await buildResearchMilestonePrompt(mid, midTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "pre-planning (has research) → plan-milestone",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (state.phase !== "pre-planning") return null;
+      return {
+        action: "dispatch",
+        unitType: "plan-milestone",
+        unitId: mid,
+        prompt: await buildPlanMilestonePrompt(mid, midTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "planning (no research, not S01) → research-slice",
+    match: async ({ state, mid, midTitle, basePath, prefs }) => {
+      if (state.phase !== "planning") return null;
+      // Phase skip: skip research when preference or profile says so
+      if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null;
+      const sid = state.activeSlice!.id;
+      const sTitle = state.activeSlice!.title;
+      const researchFile = resolveSliceFile(basePath, mid, sid, "RESEARCH");
+      if (researchFile) return null; // has research, fall through
+      // Skip slice research for S01 when milestone research already exists —
+      // the milestone research already covers the same ground for the first slice.
+      const milestoneResearchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
+      if (milestoneResearchFile && sid === "S01") return null; // fall through to plan-slice
+      return {
+        action: "dispatch",
+        unitType: "research-slice",
+        unitId: `${mid}/${sid}`,
+        prompt: await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "planning → plan-slice",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (state.phase !== "planning") return null;
+      const sid = state.activeSlice!.id;
+      const sTitle = state.activeSlice!.title;
+      return {
+        action: "dispatch",
+        unitType: "plan-slice",
+        unitId: `${mid}/${sid}`,
+        prompt: await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "replanning-slice → replan-slice",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (state.phase !== "replanning-slice") return null;
+      const sid = state.activeSlice!.id;
+      const sTitle = state.activeSlice!.title;
+      return {
+        action: "dispatch",
+        unitType: "replan-slice",
+        unitId: `${mid}/${sid}`,
+        prompt: await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "executing → execute-task",
+    match: async ({ state, mid, basePath }) => {
+      if (state.phase !== "executing" || !state.activeTask) return null;
+      const sid = state.activeSlice!.id;
+      const sTitle = state.activeSlice!.title;
+      const tid = state.activeTask.id;
+      const tTitle = state.activeTask.title;
+      return {
+        action: "dispatch",
+        unitType: "execute-task",
+        unitId: `${mid}/${sid}/${tid}`,
+        prompt: await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, basePath),
+      };
+    },
+  },
+  {
+    name: "completing-milestone → complete-milestone",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (state.phase !== "completing-milestone") return null;
+      return {
+        action: "dispatch",
+        unitType: "complete-milestone",
+        unitId: mid,
+        prompt: await buildCompleteMilestonePrompt(mid, midTitle, basePath),
+      };
+    },
+  },
+];
+
+// ─── Resolver ─────────────────────────────────────────────────────────────
+
+/**
+ * Evaluate dispatch rules in order. Returns the first matching action,
+ * or a "stop" action if no rule matches (unhandled phase).
+ */
+export async function resolveDispatch(ctx: DispatchContext): Promise<DispatchAction> {
+  for (const rule of DISPATCH_RULES) {
+    const result = await rule.match(ctx);
+    if (result) return result;
+  }
+
+  // No rule matched — unhandled phase
+  return {
+    action: "stop",
+    reason: `Unhandled phase "${ctx.state.phase}" — run /gsd doctor to diagnose.`,
+    level: "info",
+  };
+}
+
+/** Exposed for testing — returns the rule names in evaluation order. */
+export function getDispatchRuleNames(): string[] {
+  return DISPATCH_RULES.map(r => r.name);
+}
--- a/src/resources/extensions/gsd/auto-prompts.ts
+++ b/src/resources/extensions/gsd/auto-prompts.ts
@ -0,0 +1,929 @@
+/**
+ * Auto-mode Prompt Builders — construct dispatch prompts for each unit type.
+ *
+ * Pure async functions that load templates and inline file content. No module-level
+ * state, no globals — every dependency is passed as a parameter or imported as a
+ * utility.
+ */
+
+import { loadFile, parseContinue, parsePlan, parseRoadmap, parseSummary, extractUatType, loadActiveOverrides, formatOverridesSection } from "./files.js";
+import type { Override, UatType } from "./files.js";
+import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
+import {
+  resolveMilestoneFile, resolveSliceFile, resolveSlicePath,
+  resolveTasksDir, resolveTaskFiles, resolveTaskFile,
+  relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath,
+  resolveGsdRootFile, relGsdRootFile,
+} from "./paths.js";
+import { resolveSkillDiscoveryMode, resolveInlineLevel } from "./preferences.js";
+import type { GSDState, InlineLevel } from "./types.js";
+import type { GSDPreferences } from "./preferences.js";
+import { join } from "node:path";
+import { existsSync } from "node:fs";
+
+// ─── Inline Helpers ───────────────────────────────────────────────────────
+
+/**
+ * Load a file and format it for inlining into a prompt.
+ * Returns the content wrapped with a source path header, or a fallback
+ * message if the file doesn't exist. This eliminates tool calls — the LLM
+ * gets the content directly instead of "Read this file:".
+ */
+export async function inlineFile(
+  absPath: string | null, relPath: string, label: string,
+): Promise<string> {
+  const content = absPath ? await loadFile(absPath) : null;
+  if (!content) {
+    return `### ${label}\nSource: \`${relPath}\`\n\n_(not found — file does not exist yet)_`;
+  }
+  return `### ${label}\nSource: \`${relPath}\`\n\n${content.trim()}`;
+}
+
+/**
+ * Load a file for inlining, returning null if it doesn't exist.
+ * Use when the file is optional and should be omitted entirely if absent.
+ */
+export async function inlineFileOptional(
+  absPath: string | null, relPath: string, label: string,
+): Promise<string | null> {
+  const content = absPath ? await loadFile(absPath) : null;
+  if (!content) return null;
+  return `### ${label}\nSource: \`${relPath}\`\n\n${content.trim()}`;
+}
+
+/**
+ * Load and inline dependency slice summaries (full content, not just paths).
+ */
+export async function inlineDependencySummaries(
+  mid: string, sid: string, base: string,
+): Promise<string> {
+  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+  if (!roadmapContent) return "- (no dependencies)";
+
+  const roadmap = parseRoadmap(roadmapContent);
+  const sliceEntry = roadmap.slices.find(s => s.id === sid);
+  if (!sliceEntry || sliceEntry.depends.length === 0) return "- (no dependencies)";
+
+  const sections: string[] = [];
+  const seen = new Set<string>();
+  for (const dep of sliceEntry.depends) {
+    if (seen.has(dep)) continue;
+    seen.add(dep);
+    const summaryFile = resolveSliceFile(base, mid, dep, "SUMMARY");
+    const summaryContent = summaryFile ? await loadFile(summaryFile) : null;
+    const relPath = relSliceFile(base, mid, dep, "SUMMARY");
+    if (summaryContent) {
+      sections.push(`#### ${dep} Summary\nSource: \`${relPath}\`\n\n${summaryContent.trim()}`);
+    } else {
+      sections.push(`- \`${relPath}\` _(not found)_`);
+    }
+  }
+  return sections.join("\n\n");
+}
+
+/**
+ * Load a well-known .gsd/ root file for optional inlining.
+ * Handles the existsSync check internally.
+ */
+export async function inlineGsdRootFile(
+  base: string, filename: string, label: string,
+): Promise<string | null> {
+  const key = filename.replace(/\.md$/i, "").toUpperCase() as "PROJECT" | "DECISIONS" | "QUEUE" | "STATE" | "REQUIREMENTS" | "KNOWLEDGE";
+  const absPath = resolveGsdRootFile(base, key);
+  if (!existsSync(absPath)) return null;
+  return inlineFileOptional(absPath, relGsdRootFile(key), label);
+}
+
+// ─── Skill Discovery ──────────────────────────────────────────────────────
+
+/**
+ * Build the skill discovery template variables for research prompts.
+ * Returns { skillDiscoveryMode, skillDiscoveryInstructions } for template substitution.
+ */
+export function buildSkillDiscoveryVars(): { skillDiscoveryMode: string; skillDiscoveryInstructions: string } {
+  const mode = resolveSkillDiscoveryMode();
+
+  if (mode === "off") {
+    return {
+      skillDiscoveryMode: "off",
+      skillDiscoveryInstructions: " Skill discovery is disabled. Skip this step.",
+    };
+  }
+
+  const autoInstall = mode === "auto";
+  const instructions = `
+   Identify the key technologies, frameworks, and services this work depends on (e.g. Stripe, Clerk, Supabase, JUCE, SwiftUI).
+   For each, check if a professional agent skill already exists:
+   - First check \`<available_skills>\` in your system prompt — a skill may already be installed.
+   - For technologies without an installed skill, run: \`npx skills find "<technology>"\`
+   - Only consider skills that are **directly relevant** to core technologies — not tangentially related.
+   - Evaluate results by install count and relevance to the actual work.${autoInstall
+    ? `
+   - Install relevant skills: \`npx skills add <owner/repo@skill> -g -y\`
+   - Record installed skills in the "Skills Discovered" section of your research output.
+   - Installed skills will automatically appear in subsequent units' system prompts — no manual steps needed.`
+    : `
+   - Note promising skills in your research output with their install commands, but do NOT install them.
+   - The user will decide which to install.`
+  }`;
+
+  return {
+    skillDiscoveryMode: mode,
+    skillDiscoveryInstructions: instructions,
+  };
+}
+
+// ─── Text Helpers ──────────────────────────────────────────────────────────
+
+export function extractMarkdownSection(content: string, heading: string): string | null {
+  const match = new RegExp(`^## ${escapeRegExp(heading)}\\s*$`, "m").exec(content);
+  if (!match) return null;
+
+  const start = match.index + match[0].length;
+  const rest = content.slice(start);
+  const nextHeading = rest.match(/^##\s+/m);
+  const end = nextHeading?.index ?? rest.length;
+  return rest.slice(0, end).trim();
+}
+
+export function escapeRegExp(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+function oneLine(text: string): string {
+  return text.replace(/\s+/g, " ").trim();
+}
+
+// ─── Section Builders ──────────────────────────────────────────────────────
+
+export function buildResumeSection(
+  continueContent: string | null,
+  legacyContinueContent: string | null,
+  continueRelPath: string,
+  legacyContinueRelPath: string | null,
+): string {
+  const resolvedContent = continueContent ?? legacyContinueContent;
+  const resolvedRelPath = continueContent ? continueRelPath : legacyContinueRelPath;
+
+  if (!resolvedContent || !resolvedRelPath) {
+    return ["## Resume State", "- No continue file present. Start from the top of the task plan."].join("\n");
+  }
+
+  const cont = parseContinue(resolvedContent);
+  const lines = [
+    "## Resume State",
+    `Source: \`${resolvedRelPath}\``,
+    `- Status: ${cont.frontmatter.status || "in_progress"}`,
+  ];
+
+  if (cont.frontmatter.step && cont.frontmatter.totalSteps) {
+    lines.push(`- Progress: step ${cont.frontmatter.step} of ${cont.frontmatter.totalSteps}`);
+  }
+  if (cont.completedWork) lines.push(`- Completed: ${oneLine(cont.completedWork)}`);
+  if (cont.remainingWork) lines.push(`- Remaining: ${oneLine(cont.remainingWork)}`);
+  if (cont.decisions) lines.push(`- Decisions: ${oneLine(cont.decisions)}`);
+  if (cont.nextAction) lines.push(`- Next action: ${oneLine(cont.nextAction)}`);
+
+  return lines.join("\n");
+}
+
+export async function buildCarryForwardSection(priorSummaryPaths: string[], base: string): Promise<string> {
+  if (priorSummaryPaths.length === 0) {
+    return ["## Carry-Forward Context", "- No prior task summaries in this slice."].join("\n");
+  }
+
+  const items = await Promise.all(priorSummaryPaths.map(async (relPath) => {
+    const absPath = join(base, relPath);
+    const content = await loadFile(absPath);
+    if (!content) return `- \`${relPath}\``;
+
+    const summary = parseSummary(content);
+    const provided = summary.frontmatter.provides.slice(0, 2).join("; ");
+    const decisions = summary.frontmatter.key_decisions.slice(0, 2).join("; ");
+    const patterns = summary.frontmatter.patterns_established.slice(0, 2).join("; ");
+    const keyFiles = summary.frontmatter.key_files.slice(0, 3).join("; ");
+    const diagnostics = extractMarkdownSection(content, "Diagnostics");
+
+    const parts = [summary.title || relPath];
+    if (summary.oneLiner) parts.push(summary.oneLiner);
+    if (provided) parts.push(`provides: ${provided}`);
+    if (decisions) parts.push(`decisions: ${decisions}`);
+    if (patterns) parts.push(`patterns: ${patterns}`);
+    if (keyFiles) parts.push(`key_files: ${keyFiles}`);
+    if (diagnostics) parts.push(`diagnostics: ${oneLine(diagnostics)}`);
+
+    return `- \`${relPath}\` — ${parts.join(" | ")}`;
+  }));
+
+  return ["## Carry-Forward Context", ...items].join("\n");
+}
+
+export function extractSliceExecutionExcerpt(content: string | null, relPath: string): string {
+  if (!content) {
+    return [
+      "## Slice Plan Excerpt",
+      `Slice plan not found at dispatch time. Read \`${relPath}\` before running slice-level verification.`,
+    ].join("\n");
+  }
+
+  const lines = content.split("\n");
+  const goalLine = lines.find(l => l.startsWith("**Goal:**"))?.trim();
+  const demoLine = lines.find(l => l.startsWith("**Demo:**"))?.trim();
+
+  const verification = extractMarkdownSection(content, "Verification");
+  const observability = extractMarkdownSection(content, "Observability / Diagnostics");
+
+  const parts = ["## Slice Plan Excerpt", `Source: \`${relPath}\``];
+  if (goalLine) parts.push(goalLine);
+  if (demoLine) parts.push(demoLine);
+  if (verification) {
+    parts.push("", "### Slice Verification", verification.trim());
+  }
+  if (observability) {
+    parts.push("", "### Slice Observability / Diagnostics", observability.trim());
+  }
+
+  return parts.join("\n");
+}
+
+// ─── Prior Task Summaries ──────────────────────────────────────────────────
+
+export async function getPriorTaskSummaryPaths(
+  mid: string, sid: string, currentTid: string, base: string,
+): Promise<string[]> {
+  const tDir = resolveTasksDir(base, mid, sid);
+  if (!tDir) return [];
+
+  const summaryFiles = resolveTaskFiles(tDir, "SUMMARY");
+  const currentNum = parseInt(currentTid.replace(/^T/, ""), 10);
+  const sRel = relSlicePath(base, mid, sid);
+
+  return summaryFiles
+    .filter(f => {
+      const num = parseInt(f.replace(/^T/, ""), 10);
+      return num < currentNum;
+    })
+    .map(f => `${sRel}/tasks/${f}`);
+}
+
+// ─── Adaptive Replanning Checks ────────────────────────────────────────────
+
+/**
+ * Check if the most recently completed slice needs reassessment.
+ * Returns { sliceId } if reassessment is needed, null otherwise.
+ *
+ * Skips reassessment when:
+ * - No roadmap exists yet
+ * - No slices are completed
+ * - The last completed slice already has an assessment file
+ * - All slices are complete (milestone done — no point reassessing)
+ */
+export async function checkNeedsReassessment(
+  base: string, mid: string, state: GSDState,
+): Promise<{ sliceId: string } | null> {
+  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+  if (!roadmapContent) return null;
+
+  const roadmap = parseRoadmap(roadmapContent);
+  const completedSlices = roadmap.slices.filter(s => s.done);
+  const incompleteSlices = roadmap.slices.filter(s => !s.done);
+
+  // No completed slices or all slices done — skip
+  if (completedSlices.length === 0 || incompleteSlices.length === 0) return null;
+
+  // Check the last completed slice
+  const lastCompleted = completedSlices[completedSlices.length - 1];
+  const assessmentFile = resolveSliceFile(base, mid, lastCompleted.id, "ASSESSMENT");
+  const hasAssessment = !!(assessmentFile && await loadFile(assessmentFile));
+
+  if (hasAssessment) return null;
+
+  // Also need a summary to reassess against
+  const summaryFile = resolveSliceFile(base, mid, lastCompleted.id, "SUMMARY");
+  const hasSummary = !!(summaryFile && await loadFile(summaryFile));
+
+  if (!hasSummary) return null;
+
+  return { sliceId: lastCompleted.id };
+}
+
+/**
+ * Check if the most recently completed slice needs a UAT run.
+ * Returns { sliceId, uatType } if UAT should be dispatched, null otherwise.
+ *
+ * Skips when:
+ * - No roadmap or no completed slices
+ * - All slices are done (milestone complete path — reassessment handles it)
+ * - uat_dispatch preference is not enabled
+ * - No UAT file exists for the slice
+ * - UAT result file already exists (idempotent — already ran)
+ */
+export async function checkNeedsRunUat(
+  base: string, mid: string, state: GSDState, prefs: GSDPreferences | undefined,
+): Promise<{ sliceId: string; uatType: UatType } | null> {
+  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+  if (!roadmapContent) return null;
+
+  const roadmap = parseRoadmap(roadmapContent);
+  const completedSlices = roadmap.slices.filter(s => s.done);
+  const incompleteSlices = roadmap.slices.filter(s => !s.done);
+
+  // No completed slices — nothing to UAT yet
+  if (completedSlices.length === 0) return null;
+
+  // All slices done — milestone complete path, skip (reassessment handles)
+  if (incompleteSlices.length === 0) return null;
+
+  // uat_dispatch must be opted in
+  if (!prefs?.uat_dispatch) return null;
+
+  // Take the last completed slice
+  const lastCompleted = completedSlices[completedSlices.length - 1];
+  const sid = lastCompleted.id;
+
+  // UAT file must exist
+  const uatFile = resolveSliceFile(base, mid, sid, "UAT");
+  if (!uatFile) return null;
+  const uatContent = await loadFile(uatFile);
+  if (!uatContent) return null;
+
+  // If UAT result already exists, skip (idempotent)
+  const uatResultFile = resolveSliceFile(base, mid, sid, "UAT-RESULT");
+  if (uatResultFile) {
+    const hasResult = !!(await loadFile(uatResultFile));
+    if (hasResult) return null;
+  }
+
+  // Classify UAT type; unknown type → treat as human-experience (human review)
+  const uatType = extractUatType(uatContent) ?? "human-experience";
+
+  return { sliceId: sid, uatType };
+}
+
+// ─── Prompt Builders ──────────────────────────────────────────────────────
+
+export async function buildResearchMilestonePrompt(mid: string, midTitle: string, base: string): Promise<string> {
+  const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
+  const contextRel = relMilestoneFile(base, mid, "CONTEXT");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
+  const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
+  if (projectInline) inlined.push(projectInline);
+  const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+  if (requirementsInline) inlined.push(requirementsInline);
+  const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+  if (decisionsInline) inlined.push(decisionsInline);
+  const knowledgeInlineRM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlineRM) inlined.push(knowledgeInlineRM);
+  inlined.push(inlineTemplate("research", "Research"));
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const outputRelPath = relMilestoneFile(base, mid, "RESEARCH");
+  return loadPrompt("research-milestone", {
+    workingDirectory: base,
+    milestoneId: mid, milestoneTitle: midTitle,
+    milestonePath: relMilestonePath(base, mid),
+    contextPath: contextRel,
+    outputPath: outputRelPath,
+    inlinedContext,
+    ...buildSkillDiscoveryVars(),
+  });
+}
+
+export async function buildPlanMilestonePrompt(mid: string, midTitle: string, base: string, level?: InlineLevel): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+  const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
+  const contextRel = relMilestoneFile(base, mid, "CONTEXT");
+  const researchPath = resolveMilestoneFile(base, mid, "RESEARCH");
+  const researchRel = relMilestoneFile(base, mid, "RESEARCH");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
+  const researchInline = await inlineFileOptional(researchPath, researchRel, "Milestone Research");
+  if (researchInline) inlined.push(researchInline);
+  const { inlinePriorMilestoneSummary } = await import("./files.js");
+  const priorSummaryInline = await inlinePriorMilestoneSummary(mid, base);
+  if (priorSummaryInline) inlined.push(priorSummaryInline);
+  const projectInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "project.md", "Project") : null;
+  if (projectInline) inlined.push(projectInline);
+  const requirementsInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "requirements.md", "Requirements") : null;
+  if (requirementsInline) inlined.push(requirementsInline);
+  const decisionsInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "decisions.md", "Decisions") : null;
+  if (decisionsInline) inlined.push(decisionsInline);
+  const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlinePM) inlined.push(knowledgeInlinePM);
+  inlined.push(inlineTemplate("roadmap", "Roadmap"));
+  if (inlineLevel === "full") {
+    inlined.push(inlineTemplate("decisions", "Decisions"));
+    inlined.push(inlineTemplate("plan", "Slice Plan"));
+    inlined.push(inlineTemplate("task-plan", "Task Plan"));
+    inlined.push(inlineTemplate("secrets-manifest", "Secrets Manifest"));
+  } else if (inlineLevel === "standard") {
+    inlined.push(inlineTemplate("decisions", "Decisions"));
+    inlined.push(inlineTemplate("plan", "Slice Plan"));
+    inlined.push(inlineTemplate("task-plan", "Task Plan"));
+  }
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const outputRelPath = relMilestoneFile(base, mid, "ROADMAP");
+  const secretsOutputPath = relMilestoneFile(base, mid, "SECRETS");
+  return loadPrompt("plan-milestone", {
+    workingDirectory: base,
+    milestoneId: mid, milestoneTitle: midTitle,
+    milestonePath: relMilestonePath(base, mid),
+    contextPath: contextRel,
+    researchPath: researchRel,
+    outputPath: outputRelPath,
+    secretsOutputPath,
+    inlinedContext,
+  });
+}
+
+export async function buildResearchSlicePrompt(
+  mid: string, _midTitle: string, sid: string, sTitle: string, base: string,
+): Promise<string> {
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
+  const contextRel = relMilestoneFile(base, mid, "CONTEXT");
+  const milestoneResearchPath = resolveMilestoneFile(base, mid, "RESEARCH");
+  const milestoneResearchRel = relMilestoneFile(base, mid, "RESEARCH");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context");
+  if (contextInline) inlined.push(contextInline);
+  const researchInline = await inlineFileOptional(milestoneResearchPath, milestoneResearchRel, "Milestone Research");
+  if (researchInline) inlined.push(researchInline);
+  const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+  if (decisionsInline) inlined.push(decisionsInline);
+  const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+  if (requirementsInline) inlined.push(requirementsInline);
+  const knowledgeInlineRS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlineRS) inlined.push(knowledgeInlineRS);
+  inlined.push(inlineTemplate("research", "Research"));
+
+  const depContent = await inlineDependencySummaries(mid, sid, base);
+  const activeOverrides = await loadActiveOverrides(base);
+  const overridesInline = formatOverridesSection(activeOverrides);
+  if (overridesInline) inlined.unshift(overridesInline);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const outputRelPath = relSliceFile(base, mid, sid, "RESEARCH");
+  return loadPrompt("research-slice", {
+    workingDirectory: base,
+    milestoneId: mid, sliceId: sid, sliceTitle: sTitle,
+    slicePath: relSlicePath(base, mid, sid),
+    roadmapPath: roadmapRel,
+    contextPath: contextRel,
+    milestoneResearchPath: milestoneResearchRel,
+    outputPath: outputRelPath,
+    inlinedContext,
+    dependencySummaries: depContent,
+    ...buildSkillDiscoveryVars(),
+  });
+}
+
+export async function buildPlanSlicePrompt(
+  mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
+): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const researchPath = resolveSliceFile(base, mid, sid, "RESEARCH");
+  const researchRel = relSliceFile(base, mid, sid, "RESEARCH");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research");
+  if (researchInline) inlined.push(researchInline);
+  if (inlineLevel !== "minimal") {
+    const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+    if (decisionsInline) inlined.push(decisionsInline);
+    const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+    if (requirementsInline) inlined.push(requirementsInline);
+  }
+  const knowledgeInlinePS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlinePS) inlined.push(knowledgeInlinePS);
+  inlined.push(inlineTemplate("plan", "Slice Plan"));
+  if (inlineLevel === "full") {
+    inlined.push(inlineTemplate("task-plan", "Task Plan"));
+  }
+
+  const depContent = await inlineDependencySummaries(mid, sid, base);
+  const planActiveOverrides = await loadActiveOverrides(base);
+  const planOverridesInline = formatOverridesSection(planActiveOverrides);
+  if (planOverridesInline) inlined.unshift(planOverridesInline);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const outputRelPath = relSliceFile(base, mid, sid, "PLAN");
+  return loadPrompt("plan-slice", {
+    workingDirectory: base,
+    milestoneId: mid, sliceId: sid, sliceTitle: sTitle,
+    slicePath: relSlicePath(base, mid, sid),
+    roadmapPath: roadmapRel,
+    researchPath: researchRel,
+    outputPath: outputRelPath,
+    inlinedContext,
+    dependencySummaries: depContent,
+  });
+}
+
+export async function buildExecuteTaskPrompt(
+  mid: string, sid: string, sTitle: string,
+  tid: string, tTitle: string, base: string, level?: InlineLevel,
+): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+
+  const priorSummaries = await getPriorTaskSummaryPaths(mid, sid, tid, base);
+  const priorLines = priorSummaries.length > 0
+    ? priorSummaries.map(p => `- \`${p}\``).join("\n")
+    : "- (no prior tasks)";
+
+  const taskPlanPath = resolveTaskFile(base, mid, sid, tid, "PLAN");
+  const taskPlanContent = taskPlanPath ? await loadFile(taskPlanPath) : null;
+  const taskPlanRelPath = relSlicePath(base, mid, sid) + `/tasks/${tid}-PLAN.md`;
+  const taskPlanInline = taskPlanContent
+    ? [
+      "## Inlined Task Plan (authoritative local execution contract)",
+      `Source: \`${taskPlanRelPath}\``,
+      "",
+      taskPlanContent.trim(),
+    ].join("\n")
+    : [
+      "## Inlined Task Plan (authoritative local execution contract)",
+      `Task plan not found at dispatch time. Read \`${taskPlanRelPath}\` before executing.`,
+    ].join("\n");
+
+  const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
+  const slicePlanContent = slicePlanPath ? await loadFile(slicePlanPath) : null;
+  const slicePlanExcerpt = extractSliceExecutionExcerpt(slicePlanContent, relSliceFile(base, mid, sid, "PLAN"));
+
+  // Check for continue file (new naming or legacy)
+  const continueFile = resolveSliceFile(base, mid, sid, "CONTINUE");
+  const legacyContinueDir = resolveSlicePath(base, mid, sid);
+  const legacyContinuePath = legacyContinueDir ? join(legacyContinueDir, "continue.md") : null;
+  const continueContent = continueFile ? await loadFile(continueFile) : null;
+  const legacyContinueContent = !continueContent && legacyContinuePath ? await loadFile(legacyContinuePath) : null;
+  const continueRelPath = relSliceFile(base, mid, sid, "CONTINUE");
+  const resumeSection = buildResumeSection(
+    continueContent,
+    legacyContinueContent,
+    continueRelPath,
+    legacyContinuePath ? `${relSlicePath(base, mid, sid)}/continue.md` : null,
+  );
+
+  // For minimal inline level, only carry forward the most recent prior summary
+  const effectivePriorSummaries = inlineLevel === "minimal" && priorSummaries.length > 1
+    ? priorSummaries.slice(-1)
+    : priorSummaries;
+  const carryForwardSection = await buildCarryForwardSection(effectivePriorSummaries, base);
+
+  // Inline project knowledge if available
+  const knowledgeInlineET = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+
+  const inlinedTemplates = inlineLevel === "minimal"
+    ? inlineTemplate("task-summary", "Task Summary")
+    : [
+        inlineTemplate("task-summary", "Task Summary"),
+        inlineTemplate("decisions", "Decisions"),
+        ...(knowledgeInlineET ? [knowledgeInlineET] : []),
+      ].join("\n\n---\n\n");
+
+  const taskSummaryPath = `${relSlicePath(base, mid, sid)}/tasks/${tid}-SUMMARY.md`;
+
+  const activeOverrides = await loadActiveOverrides(base);
+  const overridesSection = formatOverridesSection(activeOverrides);
+
+  return loadPrompt("execute-task", {
+    overridesSection,
+    workingDirectory: base,
+    milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,
+    planPath: relSliceFile(base, mid, sid, "PLAN"),
+    slicePath: relSlicePath(base, mid, sid),
+    taskPlanPath: taskPlanRelPath,
+    taskPlanInline,
+    slicePlanExcerpt,
+    carryForwardSection,
+    resumeSection,
+    priorTaskLines: priorLines,
+    taskSummaryPath,
+    inlinedTemplates,
+  });
+}
+
+export async function buildCompleteSlicePrompt(
+  mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
+): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
+  const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan"));
+  if (inlineLevel !== "minimal") {
+    const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+    if (requirementsInline) inlined.push(requirementsInline);
+  }
+  const knowledgeInlineCS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlineCS) inlined.push(knowledgeInlineCS);
+
+  // Inline all task summaries for this slice
+  const tDir = resolveTasksDir(base, mid, sid);
+  if (tDir) {
+    const summaryFiles = resolveTaskFiles(tDir, "SUMMARY").sort();
+    for (const file of summaryFiles) {
+      const absPath = join(tDir, file);
+      const content = await loadFile(absPath);
+      const sRel = relSlicePath(base, mid, sid);
+      const relPath = `${sRel}/tasks/${file}`;
+      if (content) {
+        inlined.push(`### Task Summary: ${file.replace(/-SUMMARY\.md$/i, "")}\nSource: \`${relPath}\`\n\n${content.trim()}`);
+      }
+    }
+  }
+  inlined.push(inlineTemplate("slice-summary", "Slice Summary"));
+  if (inlineLevel !== "minimal") {
+    inlined.push(inlineTemplate("uat", "UAT"));
+  }
+  const completeActiveOverrides = await loadActiveOverrides(base);
+  const completeOverridesInline = formatOverridesSection(completeActiveOverrides);
+  if (completeOverridesInline) inlined.unshift(completeOverridesInline);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const sliceRel = relSlicePath(base, mid, sid);
+  const sliceSummaryPath = `${sliceRel}/${sid}-SUMMARY.md`;
+  const sliceUatPath = `${sliceRel}/${sid}-UAT.md`;
+
+  return loadPrompt("complete-slice", {
+    workingDirectory: base,
+    milestoneId: mid, sliceId: sid, sliceTitle: sTitle,
+    slicePath: sliceRel,
+    roadmapPath: roadmapRel,
+    inlinedContext,
+    sliceSummaryPath,
+    sliceUatPath,
+  });
+}
+
+export async function buildCompleteMilestonePrompt(
+  mid: string, midTitle: string, base: string, level?: InlineLevel,
+): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+
+  // Inline all slice summaries (deduplicated by slice ID)
+  const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
+  if (roadmapContent) {
+    const roadmap = parseRoadmap(roadmapContent);
+    const seenSlices = new Set<string>();
+    for (const slice of roadmap.slices) {
+      if (seenSlices.has(slice.id)) continue;
+      seenSlices.add(slice.id);
+      const summaryPath = resolveSliceFile(base, mid, slice.id, "SUMMARY");
+      const summaryRel = relSliceFile(base, mid, slice.id, "SUMMARY");
+      inlined.push(await inlineFile(summaryPath, summaryRel, `${slice.id} Summary`));
+    }
+  }
+
+  // Inline root GSD files (skip for minimal — completion can read these if needed)
+  if (inlineLevel !== "minimal") {
+    const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+    if (requirementsInline) inlined.push(requirementsInline);
+    const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+    if (decisionsInline) inlined.push(decisionsInline);
+    const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
+    if (projectInline) inlined.push(projectInline);
+  }
+  const knowledgeInlineCM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlineCM) inlined.push(knowledgeInlineCM);
+  // Inline milestone context file (milestone-level, not GSD root)
+  const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
+  const contextRel = relMilestoneFile(base, mid, "CONTEXT");
+  const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context");
+  if (contextInline) inlined.push(contextInline);
+  inlined.push(inlineTemplate("milestone-summary", "Milestone Summary"));
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const milestoneSummaryPath = `${relMilestonePath(base, mid)}/${mid}-SUMMARY.md`;
+
+  return loadPrompt("complete-milestone", {
+    workingDirectory: base,
+    milestoneId: mid,
+    milestoneTitle: midTitle,
+    roadmapPath: roadmapRel,
+    inlinedContext,
+    milestoneSummaryPath,
+  });
+}
+
+export async function buildReplanSlicePrompt(
+  mid: string, midTitle: string, sid: string, sTitle: string, base: string,
+): Promise<string> {
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
+  const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Current Slice Plan"));
+
+  // Find the blocker task summary — the completed task with blocker_discovered: true
+  let blockerTaskId = "";
+  const tDir = resolveTasksDir(base, mid, sid);
+  if (tDir) {
+    const summaryFiles = resolveTaskFiles(tDir, "SUMMARY").sort();
+    for (const file of summaryFiles) {
+      const absPath = join(tDir, file);
+      const content = await loadFile(absPath);
+      if (!content) continue;
+      const summary = parseSummary(content);
+      const sRel = relSlicePath(base, mid, sid);
+      const relPath = `${sRel}/tasks/${file}`;
+      if (summary.frontmatter.blocker_discovered) {
+        blockerTaskId = summary.frontmatter.id || file.replace(/-SUMMARY\.md$/i, "");
+        inlined.push(`### Blocker Task Summary: ${blockerTaskId}\nSource: \`${relPath}\`\n\n${content.trim()}`);
+      }
+    }
+  }
+
+  // Inline decisions
+  const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+  if (decisionsInline) inlined.push(decisionsInline);
+  const replanActiveOverrides = await loadActiveOverrides(base);
+  const replanOverridesInline = formatOverridesSection(replanActiveOverrides);
+  if (replanOverridesInline) inlined.unshift(replanOverridesInline);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const replanPath = `${relSlicePath(base, mid, sid)}/${sid}-REPLAN.md`;
+
+  return loadPrompt("replan-slice", {
+    workingDirectory: base,
+    milestoneId: mid,
+    sliceId: sid,
+    sliceTitle: sTitle,
+    slicePath: relSlicePath(base, mid, sid),
+    planPath: slicePlanRel,
+    blockerTaskId,
+    inlinedContext,
+    replanPath,
+  });
+}
+
+export async function buildRunUatPrompt(
+  mid: string, sliceId: string, uatPath: string, uatContent: string, base: string,
+): Promise<string> {
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(resolveSliceFile(base, mid, sliceId, "UAT"), uatPath, `${sliceId} UAT`));
+
+  const summaryPath = resolveSliceFile(base, mid, sliceId, "SUMMARY");
+  const summaryRel = relSliceFile(base, mid, sliceId, "SUMMARY");
+  if (summaryPath) {
+    const summaryInline = await inlineFileOptional(summaryPath, summaryRel, `${sliceId} Summary`);
+    if (summaryInline) inlined.push(summaryInline);
+  }
+
+  const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
+  if (projectInline) inlined.push(projectInline);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const uatResultPath = relSliceFile(base, mid, sliceId, "UAT-RESULT");
+  const uatType = extractUatType(uatContent) ?? "human-experience";
+
+  return loadPrompt("run-uat", {
+    workingDirectory: base,
+    milestoneId: mid,
+    sliceId,
+    uatPath,
+    uatResultPath,
+    uatType,
+    inlinedContext,
+  });
+}
+
+export async function buildReassessRoadmapPrompt(
+  mid: string, midTitle: string, completedSliceId: string, base: string, level?: InlineLevel,
+): Promise<string> {
+  const inlineLevel = level ?? resolveInlineLevel();
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const summaryPath = resolveSliceFile(base, mid, completedSliceId, "SUMMARY");
+  const summaryRel = relSliceFile(base, mid, completedSliceId, "SUMMARY");
+
+  const inlined: string[] = [];
+  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Current Roadmap"));
+  inlined.push(await inlineFile(summaryPath, summaryRel, `${completedSliceId} Summary`));
+  if (inlineLevel !== "minimal") {
+    const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
+    if (projectInline) inlined.push(projectInline);
+    const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
+    if (requirementsInline) inlined.push(requirementsInline);
+    const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
+    if (decisionsInline) inlined.push(decisionsInline);
+  }
+  const knowledgeInlineRA = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+  if (knowledgeInlineRA) inlined.push(knowledgeInlineRA);
+
+  const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
+
+  const assessmentPath = relSliceFile(base, mid, completedSliceId, "ASSESSMENT");
+
+  return loadPrompt("reassess-roadmap", {
+    workingDirectory: base,
+    milestoneId: mid,
+    milestoneTitle: midTitle,
+    completedSliceId,
+    roadmapPath: roadmapRel,
+    completedSliceSummaryPath: summaryRel,
+    assessmentPath,
+    inlinedContext,
+  });
+}
+
+export async function buildRewriteDocsPrompt(
+  mid: string, midTitle: string,
+  activeSlice: { id: string; title: string } | null,
+  base: string,
+  overrides: Override[],
+): Promise<string> {
+  const sid = activeSlice?.id;
+  const sTitle = activeSlice?.title ?? "";
+  const docList: string[] = [];
+
+  if (sid) {
+    const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
+    const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+    if (slicePlanPath) {
+      docList.push(`- Slice plan: \`${slicePlanRel}\``);
+      const tDir = resolveTasksDir(base, mid, sid);
+      if (tDir) {
+        const planContent = await loadFile(slicePlanPath);
+        if (planContent) {
+          const plan = parsePlan(planContent);
+          for (const task of plan.tasks) {
+            if (!task.done) {
+              const taskPlanPath = resolveTaskFile(base, mid, sid, task.id, "PLAN");
+              if (taskPlanPath) {
+                const taskRelPath = `${relSlicePath(base, mid, sid)}/tasks/${task.id}-PLAN.md`;
+                docList.push(`- Task plan: \`${taskRelPath}\``);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const decisionsPath = resolveGsdRootFile(base, "DECISIONS");
+  if (existsSync(decisionsPath)) docList.push(`- Decisions: \`${relGsdRootFile("DECISIONS")}\``);
+  const requirementsPath = resolveGsdRootFile(base, "REQUIREMENTS");
+  if (existsSync(requirementsPath)) docList.push(`- Requirements: \`${relGsdRootFile("REQUIREMENTS")}\``);
+  const projectPath = resolveGsdRootFile(base, "PROJECT");
+  if (existsSync(projectPath)) docList.push(`- Project: \`${relGsdRootFile("PROJECT")}\``);
+  const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
+  const contextRel = relMilestoneFile(base, mid, "CONTEXT");
+  if (contextPath) docList.push(`- Milestone context (reference only): \`${contextRel}\``);
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  if (roadmapPath) docList.push(`- Roadmap: \`${roadmapRel}\``);
+
+  const overrideContent = overrides.map((o, i) => [
+    `### Override ${i + 1}`,
+    `**Change:** ${o.change}`,
+    `**Issued:** ${o.timestamp}`,
+    `**During:** ${o.appliedAt}`,
+  ].join("\n")).join("\n\n");
+
+  const documentList = docList.length > 0 ? docList.join("\n") : "- No active plan documents found.";
+
+  return loadPrompt("rewrite-docs", {
+    milestoneId: mid,
+    milestoneTitle: midTitle,
+    sliceId: sid ?? "none",
+    sliceTitle: sTitle,
+    overrideContent,
+    documentList,
+    overridesPath: relGsdRootFile("OVERRIDES"),
+  });
+}
--- a/src/resources/extensions/gsd/auto-recovery.ts
+++ b/src/resources/extensions/gsd/auto-recovery.ts
@ -0,0 +1,534 @@
+/**
+ * Auto-mode Recovery — artifact resolution, verification, blocker placeholders,
+ * skip artifacts, completed-unit persistence, merge state reconciliation,
+ * self-heal runtime records, and loop remediation steps.
+ *
+ * Pure functions that receive all needed state as parameters — no module-level
+ * globals or AutoContext dependency.
+ */
+
+import type { ExtensionContext } from "@gsd/pi-coding-agent";
+import {
+  clearUnitRuntimeRecord,
+} from "./unit-runtime.js";
+import { clearParseCache } from "./files.js";
+import {
+  nativeConflictFiles,
+  nativeCommit,
+  nativeCheckoutTheirs,
+  nativeAddPaths,
+  nativeMergeAbort,
+  nativeResetHard,
+} from "./native-git-bridge.js";
+import {
+  resolveMilestonePath,
+  resolveSlicePath,
+  resolveSliceFile,
+  resolveTasksDir,
+  relMilestoneFile,
+  relSliceFile,
+  relSlicePath,
+  relTaskFile,
+  buildMilestoneFileName,
+  buildSliceFileName,
+  buildTaskFileName,
+  resolveMilestoneFile,
+  clearPathCache,
+  resolveGsdRootFile,
+} from "./paths.js";
+import { parseRoadmap } from "./files.js";
+import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, renameSync } from "node:fs";
+import { dirname, join } from "node:path";
+
+// ─── Artifact Resolution & Verification ───────────────────────────────────────
+
+/**
+ * Resolve the expected artifact for a unit to an absolute path.
+ */
+export function resolveExpectedArtifactPath(unitType: string, unitId: string, base: string): string | null {
+  const parts = unitId.split("/");
+  const mid = parts[0]!;
+  const sid = parts[1];
+  switch (unitType) {
+    case "research-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "RESEARCH")) : null;
+    }
+    case "plan-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "ROADMAP")) : null;
+    }
+    case "research-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "RESEARCH")) : null;
+    }
+    case "plan-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "PLAN")) : null;
+    }
+    case "reassess-roadmap": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "ASSESSMENT")) : null;
+    }
+    case "run-uat": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "UAT-RESULT")) : null;
+    }
+    case "execute-task": {
+      const tid = parts[2];
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir && tid ? join(dir, "tasks", buildTaskFileName(tid, "SUMMARY")) : null;
+    }
+    case "complete-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "SUMMARY")) : null;
+    }
+    case "complete-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "SUMMARY")) : null;
+    }
+    case "rewrite-docs":
+      return null;
+    default:
+      return null;
+  }
+}
+
+/**
+ * Check whether the expected artifact(s) for a unit exist on disk.
+ * Returns true if all required artifacts exist, or if the unit type has no
+ * single verifiable artifact (e.g., replan-slice).
+ *
+ * complete-slice requires both SUMMARY and UAT files — verifying only
+ * the summary allowed the unit to be marked complete when the LLM
+ * skipped writing the UAT file (see #176).
+ */
+export function verifyExpectedArtifact(unitType: string, unitId: string, base: string): boolean {
+  // Hook units have no standard artifact — always pass. Their lifecycle
+  // is managed by the hook engine, not the artifact verification system.
+  if (unitType.startsWith("hook/")) return true;
+
+  // Clear stale directory listing cache AND parse cache so artifact checks see
+  // fresh disk state (#431). The parse cache must also be cleared because
+  // cacheKey() uses length + first/last 100 chars — when a checkbox changes
+  // from [ ] to [x], the key collides with the pre-edit version, returning
+  // stale parsed results (e.g., slice.done = false when it's actually true).
+  clearPathCache();
+  clearParseCache();
+
+  if (unitType === "rewrite-docs") {
+    const overridesPath = resolveGsdRootFile(base, "OVERRIDES");
+    if (!existsSync(overridesPath)) return true;
+    const content = readFileSync(overridesPath, "utf-8");
+    return !content.includes("**Scope:** active");
+  }
+
+  const absPath = resolveExpectedArtifactPath(unitType, unitId, base);
+  // Unit types with no verifiable artifact always pass (e.g. replan-slice).
+  // For all other types, null means the parent directory is missing on disk
+  // — treat as stale completion state so the key gets evicted (#313).
+  if (!absPath) return unitType === "replan-slice";
+  if (!existsSync(absPath)) return false;
+
+  // execute-task must also have its checkbox marked [x] in the slice plan
+  if (unitType === "execute-task") {
+    const parts = unitId.split("/");
+    const mid = parts[0];
+    const sid = parts[1];
+    const tid = parts[2];
+    if (mid && sid && tid) {
+      const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
+      if (planAbs && existsSync(planAbs)) {
+        const planContent = readFileSync(planAbs, "utf-8");
+        const escapedTid = tid.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+        const re = new RegExp(`^- \\[[xX]\\] \\*\\*${escapedTid}:`, "m");
+        if (!re.test(planContent)) return false;
+      }
+    }
+  }
+
+  // complete-slice must also produce a UAT file AND mark the slice [x] in the roadmap.
+  // Without the roadmap check, a crash after writing SUMMARY+UAT but before updating
+  // the roadmap causes an infinite skip loop: the idempotency key says "done" but the
+  // state machine keeps returning the same complete-slice unit (roadmap still shows
+  // the slice incomplete), so dispatchNextUnit recurses forever.
+  if (unitType === "complete-slice") {
+    const parts = unitId.split("/");
+    const mid = parts[0];
+    const sid = parts[1];
+    if (mid && sid) {
+      const dir = resolveSlicePath(base, mid, sid);
+      if (dir) {
+        const uatPath = join(dir, buildSliceFileName(sid, "UAT"));
+        if (!existsSync(uatPath)) return false;
+      }
+      // Verify the roadmap has the slice marked [x]. If not, the completion
+      // record is stale — the unit must re-run to update the roadmap.
+      const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+      if (roadmapFile && existsSync(roadmapFile)) {
+        try {
+          const roadmapContent = readFileSync(roadmapFile, "utf-8");
+          const roadmap = parseRoadmap(roadmapContent);
+          const slice = roadmap.slices.find(s => s.id === sid);
+          if (slice && !slice.done) return false;
+        } catch {
+          // Corrupt/unparseable roadmap — fail verification so the unit
+          // re-runs and has a chance to fix the roadmap. Silently passing
+          // here could advance past an incomplete slice.
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/**
+ * Write a placeholder artifact so the pipeline can advance past a stuck unit.
+ * Returns the relative path written, or null if the path couldn't be resolved.
+ */
+export function writeBlockerPlaceholder(unitType: string, unitId: string, base: string, reason: string): string | null {
+  const absPath = resolveExpectedArtifactPath(unitType, unitId, base);
+  if (!absPath) return null;
+  const dir = dirname(absPath);
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+  const content = [
+    `# BLOCKER — auto-mode recovery failed`,
+    ``,
+    `Unit \`${unitType}\` for \`${unitId}\` failed to produce this artifact after idle recovery exhausted all retries.`,
+    ``,
+    `**Reason**: ${reason}`,
+    ``,
+    `This placeholder was written by auto-mode so the pipeline can advance.`,
+    `Review and replace this file before relying on downstream artifacts.`,
+  ].join("\n");
+  writeFileSync(absPath, content, "utf-8");
+  return diagnoseExpectedArtifact(unitType, unitId, base);
+}
+
+export function diagnoseExpectedArtifact(unitType: string, unitId: string, base: string): string | null {
+  const parts = unitId.split("/");
+  const mid = parts[0];
+  const sid = parts[1];
+  switch (unitType) {
+    case "research-milestone":
+      return `${relMilestoneFile(base, mid!, "RESEARCH")} (milestone research)`;
+    case "plan-milestone":
+      return `${relMilestoneFile(base, mid!, "ROADMAP")} (milestone roadmap)`;
+    case "research-slice":
+      return `${relSliceFile(base, mid!, sid!, "RESEARCH")} (slice research)`;
+    case "plan-slice":
+      return `${relSliceFile(base, mid!, sid!, "PLAN")} (slice plan)`;
+    case "execute-task": {
+      const tid = parts[2];
+      return `Task ${tid} marked [x] in ${relSliceFile(base, mid!, sid!, "PLAN")} + summary written`;
+    }
+    case "complete-slice":
+      return `Slice ${sid} marked [x] in ${relMilestoneFile(base, mid!, "ROADMAP")} + summary + UAT written`;
+    case "replan-slice":
+      return `${relSliceFile(base, mid!, sid!, "REPLAN")} + updated ${relSliceFile(base, mid!, sid!, "PLAN")}`;
+    case "rewrite-docs":
+      return "Active overrides resolved in .gsd/OVERRIDES.md + plan documents updated";
+    case "reassess-roadmap":
+      return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`;
+    case "run-uat":
+      return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`;
+    case "complete-milestone":
+      return `${relMilestoneFile(base, mid!, "SUMMARY")} (milestone summary)`;
+    default:
+      return null;
+  }
+}
+
+// ─── Skip / Blocker Artifact Generation ───────────────────────────────────────
+
+/**
+ * Write skip artifacts for a stuck execute-task: a blocker task summary and
+ * the [x] checkbox in the slice plan. Returns true if artifacts were written.
+ */
+export function skipExecuteTask(
+  base: string, mid: string, sid: string, tid: string,
+  status: { summaryExists: boolean; taskChecked: boolean },
+  reason: string, maxAttempts: number,
+): boolean {
+  // Write a blocker task summary if missing.
+  if (!status.summaryExists) {
+    const tasksDir = resolveTasksDir(base, mid, sid);
+    const sDir = resolveSlicePath(base, mid, sid);
+    const targetDir = tasksDir ?? (sDir ? join(sDir, "tasks") : null);
+    if (!targetDir) return false;
+    if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
+    const summaryPath = join(targetDir, buildTaskFileName(tid, "SUMMARY"));
+    const content = [
+      `# BLOCKER — task skipped by auto-mode recovery`,
+      ``,
+      `Task \`${tid}\` in slice \`${sid}\` (milestone \`${mid}\`) failed to complete after ${reason} recovery exhausted ${maxAttempts} attempts.`,
+      ``,
+      `This placeholder was written by auto-mode so the pipeline can advance.`,
+      `Review this task manually and replace this file with a real summary.`,
+    ].join("\n");
+    writeFileSync(summaryPath, content, "utf-8");
+  }
+
+  // Mark [x] in the slice plan if not already checked.
+  if (!status.taskChecked) {
+    const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
+    if (planAbs && existsSync(planAbs)) {
+      const planContent = readFileSync(planAbs, "utf-8");
+      const escapedTid = tid.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      const re = new RegExp(`^(- \\[) \\] (\\*\\*${escapedTid}:)`, "m");
+      if (re.test(planContent)) {
+        writeFileSync(planAbs, planContent.replace(re, "$1x] $2"), "utf-8");
+      } else {
+        // Regex didn't match — checkbox format differs from expected pattern.
+        // Return false so callers know the plan was NOT updated and can
+        // fall through to other recovery strategies instead of assuming success.
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// ─── Disk-backed completed-unit helpers ───────────────────────────────────────
+
+/** Path to the persisted completed-unit keys file. */
+export function completedKeysPath(base: string): string {
+  return join(base, ".gsd", "completed-units.json");
+}
+
+/** Write a completed unit key to disk (read-modify-write append to set). */
+export function persistCompletedKey(base: string, key: string): void {
+  const file = completedKeysPath(base);
+  let keys: string[] = [];
+  try {
+    if (existsSync(file)) {
+      keys = JSON.parse(readFileSync(file, "utf-8"));
+    }
+  } catch (e) { /* corrupt file — start fresh */ void e; }
+  const keySet = new Set(keys);
+  if (!keySet.has(key)) {
+    keys.push(key);
+    // Atomic write: tmp file + rename prevents partial writes on crash
+    const tmpFile = file + ".tmp";
+    writeFileSync(tmpFile, JSON.stringify(keys), "utf-8");
+    renameSync(tmpFile, file);
+  }
+}
+
+/** Remove a stale completed unit key from disk. */
+export function removePersistedKey(base: string, key: string): void {
+  const file = completedKeysPath(base);
+  try {
+    if (existsSync(file)) {
+      const keys: string[] = JSON.parse(readFileSync(file, "utf-8"));
+      const filtered = keys.filter(k => k !== key);
+      // Only write if the key was actually present
+      if (filtered.length !== keys.length) {
+        // Atomic write: tmp file + rename prevents partial writes on crash
+        const tmpFile = file + ".tmp";
+        writeFileSync(tmpFile, JSON.stringify(filtered), "utf-8");
+        renameSync(tmpFile, file);
+      }
+    }
+  } catch (e) { /* non-fatal: removePersistedKey failure */ void e; }
+}
+
+/** Load all completed unit keys from disk into the in-memory set. */
+export function loadPersistedKeys(base: string, target: Set<string>): void {
+  const file = completedKeysPath(base);
+  try {
+    if (existsSync(file)) {
+      const keys: string[] = JSON.parse(readFileSync(file, "utf-8"));
+      for (const k of keys) target.add(k);
+    }
+  } catch (e) { /* non-fatal: loadPersistedKeys failure */ void e; }
+}
+
+// ─── Merge State Reconciliation ───────────────────────────────────────────────
+
+/**
+ * Detect leftover merge state from a prior session and reconcile it.
+ * If MERGE_HEAD or SQUASH_MSG exists, check whether conflicts are resolved.
+ * If resolved: finalize the commit. If still conflicted: abort and reset.
+ *
+ * Returns true if state was dirty and re-derivation is needed.
+ */
+export function reconcileMergeState(basePath: string, ctx: ExtensionContext): boolean {
+  const mergeHeadPath = join(basePath, ".git", "MERGE_HEAD");
+  const squashMsgPath = join(basePath, ".git", "SQUASH_MSG");
+  const hasMergeHead = existsSync(mergeHeadPath);
+  const hasSquashMsg = existsSync(squashMsgPath);
+  if (!hasMergeHead && !hasSquashMsg) return false;
+
+  const conflictedFiles = nativeConflictFiles(basePath);
+  if (conflictedFiles.length === 0) {
+    // All conflicts resolved — finalize the merge/squash commit
+    try {
+      nativeCommit(basePath, "");  // --no-edit equivalent: use empty message placeholder
+      const mode = hasMergeHead ? "merge" : "squash commit";
+      ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");
+    } catch {
+      // Commit may already exist; non-fatal
+    }
+  } else {
+    // Still conflicted — try auto-resolving .gsd/ state file conflicts (#530)
+    const gsdConflicts = conflictedFiles.filter(f => f.startsWith(".gsd/"));
+    const codeConflicts = conflictedFiles.filter(f => !f.startsWith(".gsd/"));
+
+    if (gsdConflicts.length > 0 && codeConflicts.length === 0) {
+      // All conflicts are in .gsd/ state files — auto-resolve by accepting theirs
+      let resolved = true;
+      try {
+        nativeCheckoutTheirs(basePath, gsdConflicts);
+        nativeAddPaths(basePath, gsdConflicts);
+      } catch {
+        resolved = false;
+      }
+      if (resolved) {
+        try {
+          nativeCommit(basePath, "chore: auto-resolve .gsd/ state file conflicts");
+          ctx.ui.notify(
+            `Auto-resolved ${gsdConflicts.length} .gsd/ state file conflict(s) from prior merge.`,
+            "info",
+          );
+        } catch {
+          resolved = false;
+        }
+      }
+      if (!resolved) {
+        if (hasMergeHead) {
+          try { nativeMergeAbort(basePath); } catch { /* best-effort */ }
+        } else if (hasSquashMsg) {
+          try { unlinkSync(squashMsgPath); } catch { /* best-effort */ }
+        }
+        try { nativeResetHard(basePath); } catch { /* best-effort */ }
+        ctx.ui.notify(
+          "Detected leftover merge state — auto-resolve failed, cleaned up. Re-deriving state.",
+          "warning",
+        );
+      }
+    } else {
+      // Code conflicts present — abort and reset
+      if (hasMergeHead) {
+        try { nativeMergeAbort(basePath); } catch { /* best-effort */ }
+      } else if (hasSquashMsg) {
+        try { unlinkSync(squashMsgPath); } catch { /* best-effort */ }
+      }
+      try { nativeResetHard(basePath); } catch { /* best-effort */ }
+      ctx.ui.notify(
+        "Detected leftover merge state with unresolved conflicts — cleaned up. Re-deriving state.",
+        "warning",
+      );
+    }
+  }
+  return true;
+}
+
+// ─── Self-Heal Runtime Records ────────────────────────────────────────────────
+
+/**
+ * Self-heal: scan runtime records in .gsd/ and clear any where the expected
+ * artifact already exists on disk. This repairs incomplete closeouts from
+ * prior crashes — preventing spurious re-dispatch of already-completed units.
+ */
+export async function selfHealRuntimeRecords(
+  base: string,
+  ctx: ExtensionContext,
+  completedKeySet: Set<string>,
+): Promise<void> {
+  try {
+    const { listUnitRuntimeRecords } = await import("./unit-runtime.js");
+    const records = listUnitRuntimeRecords(base);
+    let healed = 0;
+    const STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
+    const now = Date.now();
+    for (const record of records) {
+      const { unitType, unitId } = record;
+      const artifactPath = resolveExpectedArtifactPath(unitType, unitId, base);
+
+      // Case 1: Artifact exists — unit completed but closeout didn't finish.
+      // Use verifyExpectedArtifact (not just existsSync) so that execute-task
+      // also checks the plan checkbox is marked [x]. Without this, a task
+      // whose summary exists but checkbox is unchecked would be incorrectly
+      // marked as completed, causing deriveState to re-dispatch it endlessly.
+      if (artifactPath && existsSync(artifactPath) && verifyExpectedArtifact(unitType, unitId, base)) {
+        clearUnitRuntimeRecord(base, unitType, unitId);
+        // Also persist completion key if missing
+        const key = `${unitType}/${unitId}`;
+        if (!completedKeySet.has(key)) {
+          persistCompletedKey(base, key);
+          completedKeySet.add(key);
+        }
+        healed++;
+        continue;
+      }
+
+      // Case 2: No artifact but record is stale (dispatched > 1h ago, process crashed)
+      const age = now - (record.startedAt ?? 0);
+      if (record.phase === "dispatched" && age > STALE_THRESHOLD_MS) {
+        clearUnitRuntimeRecord(base, unitType, unitId);
+        healed++;
+        continue;
+      }
+    }
+    if (healed > 0) {
+      ctx.ui.notify(`Self-heal: cleared ${healed} stale runtime record(s).`, "info");
+    }
+  } catch (e) {
+    // Non-fatal — self-heal should never block auto-mode start
+    void e;
+  }
+}
+
+// ─── Loop Remediation ─────────────────────────────────────────────────────────
+
+/**
+ * Build concrete, manual remediation steps for a loop-detected unit failure.
+ * These are shown when automatic reconciliation is not possible.
+ */
+export function buildLoopRemediationSteps(unitType: string, unitId: string, base: string): string | null {
+  const parts = unitId.split("/");
+  const mid = parts[0];
+  const sid = parts[1];
+  const tid = parts[2];
+  switch (unitType) {
+    case "execute-task": {
+      if (!mid || !sid || !tid) break;
+      const planRel = relSliceFile(base, mid, sid, "PLAN");
+      const summaryRel = relTaskFile(base, mid, sid, tid, "SUMMARY");
+      return [
+        `   1. Write ${summaryRel} (even a partial summary is sufficient to unblock the pipeline)`,
+        `   2. Mark ${tid} [x] in ${planRel}: change "- [ ] **${tid}:" → "- [x] **${tid}:"`,
+        `   3. Run \`gsd doctor\` to reconcile .gsd/ state`,
+        `   4. Resume auto-mode — it will pick up from the next task`,
+      ].join("\n");
+    }
+    case "plan-slice":
+    case "research-slice": {
+      if (!mid || !sid) break;
+      const artifactRel = unitType === "plan-slice"
+        ? relSliceFile(base, mid, sid, "PLAN")
+        : relSliceFile(base, mid, sid, "RESEARCH");
+      return [
+        `   1. Write ${artifactRel} manually (or with the LLM in interactive mode)`,
+        `   2. Run \`gsd doctor\` to reconcile .gsd/ state`,
+        `   3. Resume auto-mode`,
+      ].join("\n");
+    }
+    case "complete-slice": {
+      if (!mid || !sid) break;
+      return [
+        `   1. Write the slice summary and UAT file for ${sid} in ${relSlicePath(base, mid, sid)}`,
+        `   2. Mark ${sid} [x] in ${relMilestoneFile(base, mid, "ROADMAP")}`,
+        `   3. Run \`gsd doctor\` to reconcile .gsd/ state`,
+        `   4. Resume auto-mode`,
+      ].join("\n");
+    }
+    default:
+      break;
+  }
+  return null;
+}
--- a/src/resources/extensions/gsd/auto-supervisor.ts
+++ b/src/resources/extensions/gsd/auto-supervisor.ts
@ -0,0 +1,54 @@
+/**
+ * Auto-mode Supervisor — SIGTERM handling and working-tree activity detection.
+ *
+ * Pure functions — no module-level globals or AutoContext dependency.
+ */
+
+import { clearLock } from "./crash-recovery.js";
+import { nativeHasChanges } from "./native-git-bridge.js";
+
+// ─── SIGTERM Handling ─────────────────────────────────────────────────────────
+
+/**
+ * Register a SIGTERM handler that clears the lock file and exits cleanly.
+ * Captures the active base path at registration time so the handler
+ * always references the correct path even if the module variable changes.
+ * Removes any previously registered handler before installing the new one.
+ *
+ * Returns the new handler so the caller can store and deregister it later.
+ */
+export function registerSigtermHandler(
+  currentBasePath: string,
+  previousHandler: (() => void) | null,
+): () => void {
+  if (previousHandler) process.off("SIGTERM", previousHandler);
+  const handler = () => {
+    clearLock(currentBasePath);
+    process.exit(0);
+  };
+  process.on("SIGTERM", handler);
+  return handler;
+}
+
+/** Deregister the SIGTERM handler (called on stop/pause). */
+export function deregisterSigtermHandler(handler: (() => void) | null): void {
+  if (handler) {
+    process.off("SIGTERM", handler);
+  }
+}
+
+// ─── Working Tree Activity Detection ──────────────────────────────────────────
+
+/**
+ * Detect whether the agent is producing work on disk by checking git for
+ * any working-tree changes (staged, unstaged, or untracked). Returns true
+ * if there are uncommitted changes — meaning the agent is actively working,
+ * even though it hasn't signaled progress through runtime records.
+ */
+export function detectWorkingTreeActivity(cwd: string): boolean {
+  try {
+    return nativeHasChanges(cwd);
+  } catch {
+    return false;
+  }
+}
--- a/src/resources/extensions/gsd/auto-worktree.ts
+++ b/src/resources/extensions/gsd/auto-worktree.ts
@ -6,19 +6,33 @@
 * manages create, enter, detect, and teardown for auto-mode worktrees.
 */

-import { existsSync, readFileSync, realpathSync, utimesSync } from "node:fs";
+import { existsSync, cpSync, readFileSync, realpathSync, utimesSync } from "node:fs";
 import { join, resolve } from "node:path";
-import { execSync, execFileSync } from "node:child_process";
+import { execSync } from "node:child_process";
 import {
  createWorktree,
  removeWorktree,
  worktreePath,
 } from "./worktree-manager.js";
+import { detectWorktreeName } from "./worktree.js";
 import {
  MergeConflictError,
 } from "./git-service.js";
 import { parseRoadmap } from "./files.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
+import {
+  nativeGetCurrentBranch,
+  nativeWorkingTreeStatus,
+  nativeAddAll,
+  nativeCommit,
+  nativeCheckoutBranch,
+  nativeMergeSquash,
+  nativeConflictFiles,
+  nativeCheckoutTheirs,
+  nativeAddPaths,
+  nativeRmForce,
+  nativeBranchDelete,
+} from "./native-git-bridge.js";

 // ─── Module State ──────────────────────────────────────────────────────────

@ -60,18 +74,6 @@ function nudgeGitBranchCache(previousCwd: string): void {
  }
 }

-function getCurrentBranch(cwd: string): string {
-  try {
-    return execSync("git branch --show-current", {
-      cwd,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    }).trim();
-  } catch {
-    return "";
-  }
-}
-
 // ─── Auto-Worktree Branch Naming ───────────────────────────────────────────

 export function autoWorktreeBranch(milestoneId: string): string {
@ -90,6 +92,14 @@ export function autoWorktreeBranch(milestoneId: string): string {
 export function createAutoWorktree(basePath: string, milestoneId: string): string {
  const branch = autoWorktreeBranch(milestoneId);
  const info = createWorktree(basePath, milestoneId, { branch });
+
+  // Copy .gsd/ planning artifacts from the source repo into the new worktree.
+  // Worktrees are fresh git checkouts — untracked files don't carry over.
+  // Planning artifacts may be untracked if the project's .gitignore had a
+  // blanket .gsd/ rule (pre-v2.14.0). Without this copy, auto-mode loops
+  // on plan-slice because the plan file doesn't exist in the worktree.
+  copyPlanningArtifacts(basePath, info.path);
+
  const previousCwd = process.cwd();

  try {
@ -107,6 +117,36 @@ export function createAutoWorktree(basePath: string, milestoneId: string): strin
  return info.path;
 }

+/**
+ * Copy .gsd/ planning artifacts from source repo to a new worktree.
+ * Copies milestones/, DECISIONS.md, REQUIREMENTS.md, PROJECT.md, QUEUE.md.
+ * Skips runtime files (auto.lock, metrics.json, etc.) and the worktrees/ dir.
+ * Best-effort — failures are non-fatal since auto-mode can recreate artifacts.
+ */
+function copyPlanningArtifacts(srcBase: string, wtPath: string): void {
+  const srcGsd = join(srcBase, ".gsd");
+  const dstGsd = join(wtPath, ".gsd");
+  if (!existsSync(srcGsd)) return;
+
+  // Copy milestones/ directory (planning files, roadmaps, plans, research)
+  const srcMilestones = join(srcGsd, "milestones");
+  if (existsSync(srcMilestones)) {
+    try {
+      cpSync(srcMilestones, join(dstGsd, "milestones"), { recursive: true, force: true });
+    } catch { /* non-fatal */ }
+  }
+
+  // Copy top-level planning files
+  for (const file of ["DECISIONS.md", "REQUIREMENTS.md", "PROJECT.md", "QUEUE.md"]) {
+    const src = join(srcGsd, file);
+    if (existsSync(src)) {
+      try {
+        cpSync(src, join(dstGsd, file), { force: true });
+      } catch { /* non-fatal */ }
+    }
+  }
+}
+
 /**
 * Teardown an auto-worktree: chdir back to original base, then remove
 * the worktree and its branch.
@ -138,7 +178,7 @@ export function isInAutoWorktree(basePath: string): boolean {
  const resolvedBase = existsSync(basePath) ? realpathSync(basePath) : basePath;
  const wtDir = join(resolvedBase, ".gsd", "worktrees");
  if (!cwd.startsWith(wtDir)) return false;
-  const branch = getCurrentBranch(cwd);
+  const branch = nativeGetCurrentBranch(cwd);
  return branch.startsWith("milestone/");
 }

@ -185,6 +225,27 @@ export function getAutoWorktreeOriginalBase(): string | null {
  return originalBase;
 }

+export function getActiveAutoWorktreeContext(): {
+  originalBase: string;
+  worktreeName: string;
+  branch: string;
+} | null {
+  if (!originalBase) return null;
+  const cwd = process.cwd();
+  const resolvedBase = existsSync(originalBase) ? realpathSync(originalBase) : originalBase;
+  const wtDir = join(resolvedBase, ".gsd", "worktrees");
+  if (!cwd.startsWith(wtDir)) return null;
+  const worktreeName = detectWorktreeName(cwd);
+  if (!worktreeName) return null;
+  const branch = nativeGetCurrentBranch(cwd);
+  if (!branch.startsWith("milestone/")) return null;
+  return {
+    originalBase,
+    worktreeName,
+    branch,
+  };
+}
+
 // ─── Merge Milestone -> Main ───────────────────────────────────────────────

 /**
@ -193,19 +254,11 @@ export function getAutoWorktreeOriginalBase(): string | null {
 */
 function autoCommitDirtyState(cwd: string): boolean {
  try {
-    const status = execSync("git status --porcelain", {
-      cwd,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    }).trim();
+    const status = nativeWorkingTreeStatus(cwd);
    if (!status) return false;
-    execFileSync("git", ["add", "-A"], { cwd, stdio: "pipe" });
-    execFileSync("git", ["commit", "-m", "chore: auto-commit before milestone merge"], {
-      cwd,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    });
-    return true;
+    nativeAddAll(cwd);
+    const result = nativeCommit(cwd, "chore: auto-commit before milestone merge");
+    return result !== null;
  } catch {
    return false;
  }
@ -253,11 +306,7 @@ export function mergeMilestoneToMain(
  const mainBranch = prefs.main_branch || "main";

  // 5. Checkout main
-  execSync(`git checkout ${mainBranch}`, {
-    cwd: originalBasePath_,
-    stdio: ["ignore", "pipe", "pipe"],
-    encoding: "utf-8",
-  });
+  nativeCheckoutBranch(originalBasePath_, mainBranch);

  // 6. Build rich commit message
  const milestoneTitle = roadmap.title.replace(/^M\d+:\s*/, "").trim() || milestoneId;
@ -269,49 +318,48 @@ export function mergeMilestoneToMain(
  }
  const commitMessage = subject + body;

-  // 7. Squash merge
-  try {
-    execSync(`git merge --squash ${milestoneBranch}`, {
-      cwd: originalBasePath_,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    });
-  } catch (mergeErr) {
-    // Check for real conflicts
-    try {
-      const conflictOutput = execSync("git diff --name-only --diff-filter=U", {
-        cwd: originalBasePath_,
-        encoding: "utf-8",
-        stdio: ["pipe", "pipe", "pipe"],
-      }).trim();
-      if (conflictOutput) {
-        const conflictedFiles = conflictOutput.split("\n").filter(Boolean);
-        throw new MergeConflictError(conflictedFiles, "squash", milestoneBranch, mainBranch);
+  // 7. Squash merge — auto-resolve .gsd/ state file conflicts (#530)
+  const mergeResult = nativeMergeSquash(originalBasePath_, milestoneBranch);
+
+  if (!mergeResult.success) {
+    // Check for conflicts — use merge result first, fall back to nativeConflictFiles
+    const conflictedFiles = mergeResult.conflicts.length > 0
+      ? mergeResult.conflicts
+      : nativeConflictFiles(originalBasePath_);
+
+    if (conflictedFiles.length > 0) {
+      // Separate .gsd/ state file conflicts from real code conflicts.
+      // GSD state files (STATE.md, completed-units.json, auto.lock, etc.)
+      // diverge between branches during normal operation — always prefer the
+      // milestone branch version since it has the latest execution state.
+      const gsdConflicts = conflictedFiles.filter(f => f.startsWith(".gsd/"));
+      const codeConflicts = conflictedFiles.filter(f => !f.startsWith(".gsd/"));
+
+      // Auto-resolve .gsd/ conflicts by accepting the milestone branch version
+      if (gsdConflicts.length > 0) {
+        for (const gsdFile of gsdConflicts) {
+          try {
+            nativeCheckoutTheirs(originalBasePath_, [gsdFile]);
+            nativeAddPaths(originalBasePath_, [gsdFile]);
+          } catch {
+            // If checkout --theirs fails, try removing the file from the merge
+            // (it's a runtime file that shouldn't be committed anyway)
+            nativeRmForce(originalBasePath_, [gsdFile]);
+          }
+        }
+      }
+
+      // If there are still non-.gsd conflicts, escalate
+      if (codeConflicts.length > 0) {
+        throw new MergeConflictError(codeConflicts, "squash", milestoneBranch, mainBranch);
      }
-    } catch (diffErr) {
-      if (diffErr instanceof MergeConflictError) throw diffErr;
    }
    // No conflicts detected — possibly "already up to date", fall through to commit
  }

  // 8. Commit (handle nothing-to-commit gracefully)
-  let nothingToCommit = false;
-  try {
-    execFileSync("git", ["commit", "-m", commitMessage], {
-      cwd: originalBasePath_,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    });
-  } catch (err: unknown) {
-    // execSync errors have stdout/stderr as properties -- check those for git's message
-    const errObj = err as { stdout?: string; stderr?: string; message?: string };
-    const combined = [errObj.stdout, errObj.stderr, errObj.message].filter(Boolean).join(" ");
-    if (combined.includes("nothing to commit") || combined.includes("nothing added to commit") || combined.includes("no changes added")) {
-      nothingToCommit = true;
-    } else {
-      throw err;
-    }
-  }
+  const commitResult = nativeCommit(originalBasePath_, commitMessage);
+  const nothingToCommit = commitResult === null;

  // 9. Auto-push if enabled
  let pushed = false;
@ -338,11 +386,7 @@ export function mergeMilestoneToMain(

  // 11. Delete milestone branch (after worktree removal so ref is unlocked)
  try {
-    execSync(`git branch -D ${milestoneBranch}`, {
-      cwd: originalBasePath_,
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf-8",
-    });
+    nativeBranchDelete(originalBasePath_, milestoneBranch);
  } catch {
    // Best-effort
  }
--- a/Show more
+++ b/Show more