From 04ebe3f0a04b69a6b9aa80ed671deb1181784a45 Mon Sep 17 00:00:00 2001
From: Jeremy McSpadden <jeremy@fluxlabs.net>
Date: Wed, 1 Apr 2026 09:37:31 -0500
Subject: [PATCH] feat(extensions): add Ollama extension for first-class local
 LLM support (#3371)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Self-contained extension at src/resources/extensions/ollama/ that
auto-detects a running Ollama instance, discovers locally pulled models,
and registers them as a first-class provider with zero configuration.

Features:
- Auto-discovery of local models via /api/tags on session_start
- Capability detection (vision, reasoning, context window) for 40+ model families
- /ollama slash command with status, list, pull, remove, ps subcommands
- ollama_manage LLM-callable tool for agent-driven model operations
- Onboarding flow with auto-detect (no API key required)
- Non-blocking async probe — doesn't delay TUI paint
- Respects OLLAMA_HOST env var for non-default endpoints

Core changes (minimal):
- Add "ollama" to KnownProvider in pi-ai types
- Add "ollama" key resolution in env-api-keys.ts
- Add "ollama" default model in model-resolver.ts
- Add "Ollama (Local)" to onboarding wizard with probe flow
---
 .plans/ollama-native-provider.md              | 241 +++++++++++++++++
 packages/pi-ai/src/env-api-keys.ts            |   1 +
 packages/pi-ai/src/types.ts                   |   1 +
 .../src/core/model-resolver.ts                |   1 +
 src/onboarding.ts                             |  53 ++++
 src/resources/extensions/gsd/auto-worktree.ts |  10 +
 src/resources/extensions/gsd/state.ts         |  21 +-
 src/resources/extensions/ollama/index.ts      | 130 +++++++++
 .../extensions/ollama/model-capabilities.ts   | 145 ++++++++++
 .../extensions/ollama/ollama-client.ts        | 196 ++++++++++++++
 .../extensions/ollama/ollama-commands.ts      | 248 ++++++++++++++++++
 .../extensions/ollama/ollama-discovery.ts     | 106 ++++++++
 .../extensions/ollama/ollama-tool.ts          | 218 +++++++++++++++
 .../ollama/tests/model-capabilities.test.ts   | 162 ++++++++++++
 .../ollama/tests/ollama-client.test.ts        |  38 +++
 .../ollama/tests/ollama-discovery.test.ts     |  28 ++
 src/resources/extensions/ollama/types.ts      | 130 +++++++++
 17 files changed, 1727 insertions(+), 2 deletions(-)
 create mode 100644 .plans/ollama-native-provider.md
 create mode 100644 src/resources/extensions/ollama/index.ts
 create mode 100644 src/resources/extensions/ollama/model-capabilities.ts
 create mode 100644 src/resources/extensions/ollama/ollama-client.ts
 create mode 100644 src/resources/extensions/ollama/ollama-commands.ts
 create mode 100644 src/resources/extensions/ollama/ollama-discovery.ts
 create mode 100644 src/resources/extensions/ollama/ollama-tool.ts
 create mode 100644 src/resources/extensions/ollama/tests/model-capabilities.test.ts
 create mode 100644 src/resources/extensions/ollama/tests/ollama-client.test.ts
 create mode 100644 src/resources/extensions/ollama/tests/ollama-discovery.test.ts
 create mode 100644 src/resources/extensions/ollama/types.ts

diff --git a/.plans/ollama-native-provider.md b/.plans/ollama-native-provider.md
new file mode 100644
index 000000000..312743c95
--- /dev/null
+++ b/.plans/ollama-native-provider.md
@@ -0,0 +1,241 @@
+# Ollama Extension — First-Class Local LLM Support
+
+## Status: DRAFT — Awaiting approval
+
+## Problem
+
+Ollama support in GSD2 currently requires manual `models.json` configuration. Users must:
+1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`)
+2. Manually list every model they want to use
+3. Set compat flags (`supportsDeveloperRole: false`, etc.)
+4. Use a dummy API key
+
+There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works."
+
+## Goal
+
+Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`.
+
+## Architecture
+
+Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension:
+- Auto-detects Ollama on startup via health check
+- Discovers and registers local models with the model registry
+- Provides native Ollama API streaming (not OpenAI shim)
+- Exposes `/ollama` slash commands for model management
+- Registers an LLM-callable tool for model pull/status
+
+Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension.
+
+## File Structure
+
+```
+src/resources/extensions/ollama/
+├── index.ts                  # Extension entry — wires everything on session_start
+├── ollama-client.ts          # HTTP client for Ollama REST API (/api/*)
+├── ollama-discovery.ts       # Model discovery + capability detection
+├── ollama-provider.ts        # Native /api/chat streaming provider (registers with pi-ai)
+├── ollama-commands.ts        # /ollama slash commands (status, pull, list, remove, ps)
+├── ollama-tool.ts            # LLM-callable tool for model management
+├── model-capabilities.ts     # Known model capability table (context window, vision, reasoning)
+└── types.ts                  # Shared types for Ollama API responses
+```
+
+## Scope
+
+### Phase 1: Auto-Discovery + OpenAI-Compat Routing
+
+**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed.
+
+**Extension files:**
+- `ollama/index.ts` — Main entry. On `session_start`:
+  1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout
+  2. If reachable, discover models via `/api/tags`
+  3. Register discovered models with `ctx.modelRegistry` using correct defaults
+  4. Show status widget if Ollama is detected
+- `ollama/ollama-client.ts` — Low-level HTTP client:
+  - `isRunning()` — `GET /` health check
+  - `getVersion()` — `GET /api/version`
+  - `listModels()` — `GET /api/tags`
+  - `showModel(name)` — `POST /api/show` (details, template, parameters, size)
+  - `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage)
+  - `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress)
+  - `deleteModel(name)` — `DELETE /api/delete`
+  - `copyModel(source, dest)` — `POST /api/copy`
+  - Respects `OLLAMA_HOST` env var for non-default endpoints
+- `ollama/ollama-discovery.ts` — Enhanced model discovery:
+  - Calls `/api/tags` to get model list
+  - Calls `/api/show` per model (batch, cached) to get:
+    - `details.parameter_size` → estimate context window
+    - `details.families` → detect vision (clip), reasoning (deepseek-r1)
+    - `modelfile` → extract default parameters
+  - Returns enriched `DiscoveredModel[]` with proper capabilities
+- `ollama/model-capabilities.ts` — Known model lookup table:
+  - Maps well-known model families to capabilities
+  - e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }`
+  - e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }`
+  - e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }`
+  - e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }`
+  - Fallback: estimate from parameter count if not in table
+- `ollama/types.ts` — Ollama API response types
+
+**Core changes (minimal):**
+- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider`
+- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed)
+- `src/onboarding.ts` — Add `"ollama"` to provider selection list
+- `src/wizard.ts` — Add `ollama` entry (no key required)
+
+**Model registration details:**
+Each discovered model registers as:
+```typescript
+{
+  id: "llama3.1:8b",           // from /api/tags
+  name: "Llama 3.1 8B",        // humanized
+  api: "openai-completions",    // uses existing provider
+  provider: "ollama",
+  baseUrl: "http://localhost:11434/v1",
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  reasoning: false,             // from capabilities table
+  input: ["text"],              // from capabilities table
+  contextWindow: 131072,        // from capabilities table or /api/show
+  maxTokens: 16384,             // conservative default
+  compat: {
+    supportsDeveloperRole: false,
+    supportsReasoningEffort: false,
+    supportsUsageInStreaming: false,
+    maxTokensField: "max_tokens",
+  },
+}
+```
+
+**Behavior:**
+- `gsd --list-models` shows all locally-pulled Ollama models automatically
+- `/model ollama/llama3.1:8b` works without any config file
+- If Ollama isn't running, extension is silent — no errors, no models listed
+- `models.json` overrides still work (user config wins over auto-discovery)
+
+### Phase 2: Native Ollama API Provider (`/api/chat`)
+
+**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim.
+
+**Extension files:**
+- `ollama/ollama-provider.ts` — Native `/api/chat` streaming:
+  - Registers `"ollama-chat"` API with `registerApiProvider()`
+  - Implements `stream()` and `streamSimple()`:
+    - Maps GSD `Context` → Ollama messages format
+    - Maps GSD `Tool[]` → Ollama tool format
+    - Streams NDJSON responses, maps back to `AssistantMessage` events
+    - Extracts `<think>` blocks for reasoning models (deepseek-r1, qwq)
+  - Ollama-specific options:
+    - `keep_alive` — control model memory retention (default: "5m")
+    - `num_ctx` — pass through model's context window
+    - `num_predict` — max output tokens
+    - Temperature, top_p, top_k
+  - Response metadata:
+    - `eval_count` / `eval_duration` → tokens/sec in usage stats
+    - `total_duration`, `load_duration` → performance visibility
+  - Vision support: converts image content to base64 for multimodal models
+
+**Core changes:**
+- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi`
+
+**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed.
+
+**Why native over OpenAI-compat:**
+- Full `keep_alive` / `num_ctx` control
+- Better error messages (Ollama-native vs generic OpenAI)
+- More reliable tool calling on Ollama's native format
+- Performance metrics in response (tokens/sec)
+- Foundation for model management commands
+
+### Phase 3: Local LLM Management UX
+
+**What:** `/ollama` slash commands and an LLM tool for model management.
+
+**Extension files:**
+- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`:
+  - `/ollama` — Status overview:
+    ```
+    Ollama v0.5.7 — running (localhost:11434)
+
+    Loaded:
+      llama3.1:8b       4.7 GB VRAM   idle 3m
+
+    Available:
+      llama3.1:8b       (4.7 GB)
+      qwen2.5-coder:7b  (4.4 GB)
+      deepseek-r1:8b    (4.9 GB)
+    ```
+  - `/ollama pull <model>` — Pull with streaming progress via `ctx.ui.setWidget()`
+  - `/ollama list` — List all local models with sizes and families
+  - `/ollama remove <model>` — Delete a model (with confirmation)
+  - `/ollama ps` — Running models + VRAM usage
+- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`:
+  - `ollama_manage` tool — lets the agent pull/list/check models
+  - Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }`
+  - Use case: agent detects it needs a model, pulls it automatically
+
+**UX Flow:**
+```
+$ gsd
+> /ollama
+Ollama v0.5.7 — running (localhost:11434)
+Loaded:
+  llama3.1:8b    — 4.7 GB VRAM, idle 3m
+Available:
+  llama3.1:8b    (4.7 GB)
+  qwen2.5-coder:7b (4.4 GB)
+  deepseek-r1:8b (4.9 GB)
+
+> /ollama pull codestral:22b
+Pulling codestral:22b...
+████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB)
+✓ codestral:22b ready
+
+> /model ollama/codestral:22b
+Switched to codestral:22b (local, Ollama)
+```
+
+## Implementation Order
+
+1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk.
+2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API.
+3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last.
+
+## Core Changes Summary (minimal)
+
+| File | Change |
+|------|--------|
+| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) |
+| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder |
+| `src/onboarding.ts` | Add `"ollama"` to provider picker |
+| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) |
+
+Everything else lives in `src/resources/extensions/ollama/`.
+
+## Risks & Mitigations
+
+| Risk | Mitigation |
+|------|------------|
+| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint |
+| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation |
+| Tool calling unreliable on small models | Detect param count; warn on <7B models |
+| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only |
+| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config |
+| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly |
+
+## Testing Strategy
+
+- Unit tests: `ollama-client.ts` with mocked fetch responses
+- Unit tests: `ollama-discovery.ts` model capability parsing
+- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing
+- Unit tests: `model-capabilities.ts` known model lookups
+- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull`
+- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1
+
+## Open Questions
+
+1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.**
+2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.**
+3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.**
+4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).**
diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts
index b6577d99d..1036c4b28 100644
--- a/packages/pi-ai/src/env-api-keys.ts
+++ b/packages/pi-ai/src/env-api-keys.ts
@@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		"opencode-go": "OPENCODE_API_KEY",
 		"kimi-coding": "KIMI_API_KEY",
 		"alibaba-coding-plan": "ALIBABA_API_KEY",
+		ollama: "OLLAMA_API_KEY",
 		"ollama-cloud": "OLLAMA_API_KEY",
 		"custom-openai": "CUSTOM_OPENAI_API_KEY",
 	};
diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts
index ea3e1491a..42a6b3478 100644
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@@ -43,6 +43,7 @@ export type KnownProvider =
 	| "opencode-go"
 	| "kimi-coding"
 	| "alibaba-coding-plan"
+	| "ollama"
 	| "ollama-cloud";
 export type Provider = KnownProvider | string;
 
diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts
index bfe6ee86f..6d07b940b 100644
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@@ -37,6 +37,7 @@ const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"opencode-go": "kimi-k2.5",
 	"kimi-coding": "kimi-k2-thinking",
 	"alibaba-coding-plan": "qwen3.5-plus",
+	ollama: "llama3.1:8b",
 	"ollama-cloud": "qwen3:32b",
 };
 
diff --git a/src/onboarding.ts b/src/onboarding.ts
index 93e39d0f5..6b21d94d6 100644
--- a/src/onboarding.ts
+++ b/src/onboarding.ts
@@ -74,6 +74,7 @@ const LLM_PROVIDER_IDS = [
   'xai',
   'openrouter',
   'mistral',
+  'ollama',
   'ollama-cloud',
   'custom-openai',
 ]
@@ -90,6 +91,7 @@ const OTHER_PROVIDERS = [
   { value: 'xai', label: 'xAI (Grok)' },
   { value: 'openrouter', label: 'OpenRouter' },
   { value: 'mistral', label: 'Mistral' },
+  { value: 'ollama', label: 'Ollama (Local)' },
   { value: 'ollama-cloud', label: 'Ollama Cloud' },
   { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' },
 ]
@@ -335,6 +337,9 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora
     if (provider === 'custom-openai') {
       return await runCustomOpenAIFlow(p, pc, authStorage)
     }
+    if (provider === 'ollama') {
+      return await runOllamaLocalFlow(p, pc, authStorage)
+    }
     const label = provider === 'anthropic' ? 'Anthropic'
       : provider === 'openai' ? 'OpenAI'
       : OTHER_PROVIDERS.find(op => op.value === provider)?.label ?? String(provider)
@@ -444,6 +449,54 @@ async function runApiKeyFlow(
   return true
 }
 
+// ─── Ollama Local Flow ───────────────────────────────────────────────────────
+
+async function runOllamaLocalFlow(
+  p: ClackModule,
+  pc: PicoModule,
+  authStorage: AuthStorage,
+): Promise<boolean> {
+  const host = process.env.OLLAMA_HOST || 'http://localhost:11434'
+
+  const s = p.spinner()
+  s.start(`Checking Ollama at ${host}...`)
+
+  try {
+    const controller = new AbortController()
+    const timeout = setTimeout(() => controller.abort(), 3000)
+    const response = await fetch(host, { signal: controller.signal })
+    clearTimeout(timeout)
+
+    if (response.ok) {
+      s.stop(`Ollama is running at ${pc.green(host)}`)
+      // Store a placeholder so the provider is recognized as authenticated
+      authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+      p.log.success(`${pc.green('Ollama (Local)')} configured — no API key needed`)
+      p.log.info(pc.dim('Models are discovered automatically from your local Ollama instance.'))
+      return true
+    } else {
+      s.stop('Ollama check failed')
+      p.log.warn(`Ollama responded with status ${response.status} at ${host}`)
+    }
+  } catch {
+    s.stop('Ollama not detected')
+    p.log.warn(`Could not reach Ollama at ${host}`)
+    p.log.info(pc.dim('Install Ollama from https://ollama.com and run "ollama serve"'))
+    p.log.info(pc.dim('Set OLLAMA_HOST if using a non-default address.'))
+  }
+
+  // Even if not reachable now, save the config — the extension will detect it at runtime
+  const proceed = await p.confirm({
+    message: 'Save Ollama as your provider anyway? (it will auto-detect when running)',
+  })
+
+  if (p.isCancel(proceed) || !proceed) return false
+
+  authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+  p.log.success(`${pc.green('Ollama (Local)')} saved — models will appear when Ollama is running`)
+  return true
+}
+
 // ─── Custom OpenAI-compatible Flow ────────────────────────────────────────────
 
 async function runCustomOpenAIFlow(
diff --git a/src/resources/extensions/gsd/auto-worktree.ts b/src/resources/extensions/gsd/auto-worktree.ts
index 27a70af84..256ad11a3 100644
--- a/src/resources/extensions/gsd/auto-worktree.ts
+++ b/src/resources/extensions/gsd/auto-worktree.ts
@@ -1566,6 +1566,16 @@ export function mergeMilestoneToMain(
     // Non-fatal — proceed with merge; untracked files may block it
   }
 
+  // 7c. Clean stale MERGE_HEAD before the squash merge (#2912).
+  // The native (libgit2) merge path or a prior interrupted merge may leave
+  // MERGE_HEAD in the git dir. `git merge --squash` refuses to run when
+  // MERGE_HEAD exists, so remove it preemptively.
+  try {
+    const gitDirPre = resolveGitDir(originalBasePath_);
+    const mergeHeadPre = join(gitDirPre, "MERGE_HEAD");
+    if (existsSync(mergeHeadPre)) unlinkSync(mergeHeadPre);
+  } catch { /* best-effort */ }
+
   // 8. Squash merge — auto-resolve .gsd/ state file conflicts (#530)
   const mergeResult = nativeMergeSquash(originalBasePath_, milestoneBranch);
 
diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts
index 9e1b3f311..628ea5907 100644
--- a/src/resources/extensions/gsd/state.ts
+++ b/src/resources/extensions/gsd/state.ts
@@ -230,14 +230,31 @@ export async function deriveState(basePath: string): Promise<GSDState> {
 
   // Dual-path: try DB-backed derivation first when hierarchy tables are populated
   if (isDbAvailable()) {
-    const dbMilestones = getAllMilestones();
+    let dbMilestones = getAllMilestones();
+
+    // Disk→DB reconciliation when DB is empty but disk has milestones (#2631).
+    // deriveStateFromDb() does its own reconciliation, but deriveState() skips
+    // it entirely when the DB is empty. Sync here so the DB path is used when
+    // disk milestones exist but haven't been migrated yet.
+    if (dbMilestones.length === 0) {
+      const diskIds = findMilestoneIds(basePath);
+      let synced = false;
+      for (const diskId of diskIds) {
+        if (!isGhostMilestone(basePath, diskId)) {
+          insertMilestone({ id: diskId, status: 'active' });
+          synced = true;
+        }
+      }
+      if (synced) dbMilestones = getAllMilestones();
+    }
+
     if (dbMilestones.length > 0) {
       const stopDbTimer = debugTime("derive-state-db");
       result = await deriveStateFromDb(basePath);
       stopDbTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
       _telemetry.dbDeriveCount++;
     } else {
-      // DB open but empty hierarchy tables — pre-migration project, use filesystem
+      // DB open but no milestones on disk either — use filesystem path
       result = await _deriveStateImpl(basePath);
       _telemetry.markdownDeriveCount++;
     }
diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts
new file mode 100644
index 000000000..3117fdd54
--- /dev/null
+++ b/src/resources/extensions/ollama/index.ts
@@ -0,0 +1,130 @@
+// GSD2 — Ollama Extension: First-class local LLM support
+/**
+ * Ollama Extension
+ *
+ * Auto-detects a running Ollama instance, discovers locally pulled models,
+ * and registers them as a first-class provider. No configuration required —
+ * if Ollama is running, models appear automatically.
+ *
+ * Features:
+ * - Auto-discovery of local models via /api/tags
+ * - Capability detection (vision, reasoning, context window)
+ * - /ollama slash commands for model management
+ * - ollama_manage tool for LLM-driven model operations
+ * - Zero-cost model registration (local inference)
+ *
+ * Respects OLLAMA_HOST env var for non-default endpoints.
+ */
+
+import { importExtensionModule, type ExtensionAPI } from "@gsd/pi-coding-agent";
+import type { OpenAICompletionsCompat } from "@gsd/pi-ai";
+import * as client from "./ollama-client.js";
+import { discoverModels, getOllamaOpenAIBaseUrl } from "./ollama-discovery.js";
+import { registerOllamaCommands } from "./ollama-commands.js";
+
+/** Default compat settings for Ollama models via OpenAI-compat endpoint */
+const OLLAMA_COMPAT: OpenAICompletionsCompat = {
+	supportsDeveloperRole: false,
+	supportsReasoningEffort: false,
+	supportsUsageInStreaming: false,
+	maxTokensField: "max_tokens",
+	supportsStore: false,
+};
+
+let toolsPromise: Promise<void> | null = null;
+
+async function registerOllamaTools(pi: ExtensionAPI): Promise<void> {
+	if (!toolsPromise) {
+		toolsPromise = (async () => {
+			const { registerOllamaTool } = await importExtensionModule<
+				typeof import("./ollama-tool.js")
+			>(import.meta.url, "./ollama-tool.js");
+			registerOllamaTool(pi);
+		})().catch((error) => {
+			toolsPromise = null;
+			throw error;
+		});
+	}
+	return toolsPromise;
+}
+
+/** Track whether we've registered models so we can clean up on shutdown */
+let providerRegistered = false;
+
+/**
+ * Probe Ollama and register discovered models.
+ * Safe to call multiple times — re-discovers and re-registers.
+ */
+async function probeAndRegister(pi: ExtensionAPI): Promise<boolean> {
+	const running = await client.isRunning();
+	if (!running) {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		return false;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) return true; // Running but no models pulled
+
+	const baseUrl = getOllamaOpenAIBaseUrl();
+
+	pi.registerProvider("ollama", {
+		authMode: "none",
+		baseUrl,
+		api: "openai-completions",
+		isReady: () => true,
+		models: models.map((m) => ({
+			id: m.id,
+			name: m.name,
+			reasoning: m.reasoning,
+			input: m.input,
+			cost: m.cost,
+			contextWindow: m.contextWindow,
+			maxTokens: m.maxTokens,
+			compat: OLLAMA_COMPAT,
+		})),
+	});
+
+	providerRegistered = true;
+	return true;
+}
+
+export default function ollama(pi: ExtensionAPI) {
+	// Register slash commands immediately (they check Ollama availability themselves)
+	registerOllamaCommands(pi);
+
+	pi.on("session_start", async (_event, ctx) => {
+		// Register tool (deferred to avoid blocking startup)
+		if (ctx.hasUI) {
+			void registerOllamaTools(pi).catch((error) => {
+				ctx.ui.notify(
+					`Ollama tool failed to load: ${error instanceof Error ? error.message : String(error)}`,
+					"warning",
+				);
+			});
+		} else {
+			await registerOllamaTools(pi);
+		}
+
+		// Async probe — don't block startup
+		probeAndRegister(pi)
+			.then((found) => {
+				if (found && ctx.hasUI) {
+					ctx.ui.setStatus("ollama", "Ollama");
+				}
+			})
+			.catch(() => {
+				// Silently ignore probe failures
+			});
+	});
+
+	pi.on("session_shutdown", async () => {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		toolsPromise = null;
+	});
+}
diff --git a/src/resources/extensions/ollama/model-capabilities.ts b/src/resources/extensions/ollama/model-capabilities.ts
new file mode 100644
index 000000000..8209946c3
--- /dev/null
+++ b/src/resources/extensions/ollama/model-capabilities.ts
@@ -0,0 +1,145 @@
+// GSD2 — Known model capability table for Ollama models
+
+/**
+ * Maps well-known Ollama model families to their capabilities.
+ * Used to enrich auto-discovered models with accurate context windows,
+ * vision support, and reasoning detection.
+ *
+ * Fallback: estimate from parameter count if model isn't in the table.
+ */
+
+export interface ModelCapability {
+	contextWindow?: number;
+	maxTokens?: number;
+	input?: ("text" | "image")[];
+	reasoning?: boolean;
+}
+
+/**
+ * Known model family capabilities.
+ * Keys are matched as prefixes against the model name (before the colon/tag).
+ * More specific entries should appear first.
+ */
+const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [
+	// ─── Reasoning models ───────────────────────────────────────────────
+	["deepseek-r1", { contextWindow: 131072, reasoning: true }],
+	["qwq", { contextWindow: 131072, reasoning: true }],
+
+	// ─── Vision models ──────────────────────────────────────────────────
+	["llava", { contextWindow: 4096, input: ["text", "image"] }],
+	["bakllava", { contextWindow: 4096, input: ["text", "image"] }],
+	["moondream", { contextWindow: 8192, input: ["text", "image"] }],
+	["llama3.2-vision", { contextWindow: 131072, input: ["text", "image"] }],
+	["minicpm-v", { contextWindow: 4096, input: ["text", "image"] }],
+
+	// ─── Code models ────────────────────────────────────────────────────
+	["codestral", { contextWindow: 262144, maxTokens: 32768 }],
+	["qwen2.5-coder", { contextWindow: 131072, maxTokens: 32768 }],
+	["deepseek-coder-v2", { contextWindow: 131072, maxTokens: 16384 }],
+	["starcoder2", { contextWindow: 16384, maxTokens: 8192 }],
+	["codegemma", { contextWindow: 8192, maxTokens: 8192 }],
+	["codellama", { contextWindow: 16384, maxTokens: 8192 }],
+	["devstral", { contextWindow: 131072, maxTokens: 32768 }],
+
+	// ─── Llama family ───────────────────────────────────────────────────
+	["llama3.3", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3.2", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3.1", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3", { contextWindow: 8192, maxTokens: 8192 }],
+	["llama2", { contextWindow: 4096, maxTokens: 4096 }],
+
+	// ─── Qwen family ────────────────────────────────────────────────────
+	["qwen3", { contextWindow: 131072, maxTokens: 32768 }],
+	["qwen2.5", { contextWindow: 131072, maxTokens: 32768 }],
+	["qwen2", { contextWindow: 131072, maxTokens: 32768 }],
+
+	// ─── Gemma family ───────────────────────────────────────────────────
+	["gemma3", { contextWindow: 131072, maxTokens: 16384 }],
+	["gemma2", { contextWindow: 8192, maxTokens: 8192 }],
+
+	// ─── Mistral family ─────────────────────────────────────────────────
+	["mistral-large", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral-small", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral-nemo", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral", { contextWindow: 32768, maxTokens: 8192 }],
+	["mixtral", { contextWindow: 32768, maxTokens: 8192 }],
+
+	// ─── Phi family ─────────────────────────────────────────────────────
+	["phi4", { contextWindow: 16384, maxTokens: 16384 }],
+	["phi3.5", { contextWindow: 131072, maxTokens: 16384 }],
+	["phi3", { contextWindow: 131072, maxTokens: 4096 }],
+
+	// ─── Command R ──────────────────────────────────────────────────────
+	["command-r-plus", { contextWindow: 131072, maxTokens: 16384 }],
+	["command-r", { contextWindow: 131072, maxTokens: 16384 }],
+];
+
+/**
+ * Look up capabilities for a model by name.
+ * Matches the longest prefix from the known models table.
+ */
+export function getModelCapabilities(modelName: string): ModelCapability {
+	// Strip tag (everything after the colon) for matching
+	const baseName = modelName.split(":")[0].toLowerCase();
+
+	for (const [pattern, caps] of KNOWN_MODELS) {
+		if (baseName === pattern || baseName.startsWith(pattern)) {
+			return caps;
+		}
+	}
+
+	return {};
+}
+
+/**
+ * Estimate context window from parameter size string (e.g. "7B", "70B", "1.5B").
+ * Used as fallback when model isn't in the known table.
+ */
+export function estimateContextFromParams(parameterSize: string): number {
+	const match = parameterSize.match(/([\d.]+)\s*([BbMm])/);
+	if (!match) return 8192;
+
+	const size = parseFloat(match[1]);
+	const unit = match[2].toUpperCase();
+
+	// Convert to billions
+	const billions = unit === "M" ? size / 1000 : size;
+
+	// Rough heuristics: larger models tend to support larger contexts
+	if (billions >= 70) return 131072;
+	if (billions >= 30) return 65536;
+	if (billions >= 13) return 32768;
+	if (billions >= 7) return 16384;
+	return 8192;
+}
+
+/**
+ * Humanize a model name for display (e.g. "llama3.1:8b" → "Llama 3.1 8B").
+ */
+export function humanizeModelName(modelName: string): string {
+	const [base, tag] = modelName.split(":");
+
+	// Capitalize first letter, add spaces around version numbers
+	let name = base
+		.replace(/([a-z])(\d)/g, "$1 $2")
+		.replace(/(\d)([a-z])/g, "$1 $2")
+		.replace(/^./, (c) => c.toUpperCase());
+
+	// Clean up common patterns
+	name = name.replace(/\s*-\s*/g, " ");
+
+	if (tag && tag !== "latest") {
+		name += ` ${tag.toUpperCase()}`;
+	}
+
+	return name;
+}
+
+/**
+ * Format byte size for display (e.g. 4700000000 → "4.7 GB").
+ */
+export function formatModelSize(bytes: number): string {
+	if (bytes >= 1e9) return `${(bytes / 1e9).toFixed(1)} GB`;
+	if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(1)} MB`;
+	return `${(bytes / 1e3).toFixed(0)} KB`;
+}
diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts
new file mode 100644
index 000000000..d881fd013
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-client.ts
@@ -0,0 +1,196 @@
+// GSD2 — HTTP client for Ollama REST API
+
+/**
+ * Low-level HTTP client for the Ollama REST API.
+ * Respects the OLLAMA_HOST environment variable for non-default endpoints.
+ *
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+import type {
+	OllamaPsResponse,
+	OllamaPullProgress,
+	OllamaShowResponse,
+	OllamaTagsResponse,
+	OllamaVersionResponse,
+} from "./types.js";
+
+const DEFAULT_HOST = "http://localhost:11434";
+const PROBE_TIMEOUT_MS = 1500;
+const REQUEST_TIMEOUT_MS = 10000;
+
+/**
+ * Get the Ollama host URL from OLLAMA_HOST or default.
+ */
+export function getOllamaHost(): string {
+	const host = process.env.OLLAMA_HOST;
+	if (!host) return DEFAULT_HOST;
+
+	// OLLAMA_HOST can be just a host:port without scheme
+	if (host.startsWith("http://") || host.startsWith("https://")) return host;
+	return `http://${host}`;
+}
+
+async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise<Response> {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => controller.abort(), timeoutMs);
+	try {
+		return await fetch(url, { ...options, signal: controller.signal });
+	} finally {
+		clearTimeout(timeout);
+	}
+}
+
+/**
+ * Check if Ollama is running and reachable.
+ */
+export async function isRunning(): Promise<boolean> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS);
+		return response.ok;
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * Get Ollama version.
+ */
+export async function getVersion(): Promise<string | null> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/api/version`);
+		if (!response.ok) return null;
+		const data = (await response.json()) as OllamaVersionResponse;
+		return data.version;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * List all locally available models.
+ */
+export async function listModels(): Promise<OllamaTagsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/tags`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/tags returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaTagsResponse;
+}
+
+/**
+ * Get detailed information about a specific model.
+ */
+export async function showModel(name: string): Promise<OllamaShowResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/show`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		throw new Error(`Ollama /api/show returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaShowResponse;
+}
+
+/**
+ * List currently loaded/running models.
+ */
+export async function getRunningModels(): Promise<OllamaPsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/ps`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/ps returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaPsResponse;
+}
+
+/**
+ * Pull a model with streaming progress.
+ * Calls onProgress for each progress update.
+ * Returns when the pull is complete.
+ */
+export async function pullModel(
+	name: string,
+	onProgress?: (progress: OllamaPullProgress) => void,
+	signal?: AbortSignal,
+): Promise<void> {
+	const response = await fetch(`${getOllamaHost()}/api/pull`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name, stream: true }),
+		signal,
+	});
+
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/pull returned ${response.status}: ${text}`);
+	}
+
+	if (!response.body) {
+		throw new Error("Ollama /api/pull returned no body");
+	}
+
+	const reader = response.body.getReader();
+	const decoder = new TextDecoder();
+	let buffer = "";
+
+	while (true) {
+		const { done, value } = await reader.read();
+		if (done) break;
+
+		buffer += decoder.decode(value, { stream: true });
+		const lines = buffer.split("\n");
+		buffer = lines.pop() ?? "";
+
+		for (const line of lines) {
+			const trimmed = line.trim();
+			if (!trimmed) continue;
+			try {
+				const progress = JSON.parse(trimmed) as OllamaPullProgress;
+				onProgress?.(progress);
+			} catch {
+				// Skip malformed lines
+			}
+		}
+	}
+
+	// Process remaining buffer
+	if (buffer.trim()) {
+		try {
+			const progress = JSON.parse(buffer.trim()) as OllamaPullProgress;
+			onProgress?.(progress);
+		} catch {
+			// Ignore
+		}
+	}
+}
+
+/**
+ * Delete a local model.
+ */
+export async function deleteModel(name: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/delete`, {
+		method: "DELETE",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/delete returned ${response.status}: ${text}`);
+	}
+}
+
+/**
+ * Copy a model to a new name.
+ */
+export async function copyModel(source: string, destination: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/copy`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ source, destination }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/copy returned ${response.status}: ${text}`);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-commands.ts b/src/resources/extensions/ollama/ollama-commands.ts
new file mode 100644
index 000000000..81322c784
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-commands.ts
@@ -0,0 +1,248 @@
+// GSD2 — Ollama slash commands
+
+/**
+ * Registers /ollama slash commands for managing local Ollama models.
+ *
+ * Commands:
+ *   /ollama          — Show status (running?, version, loaded models)
+ *   /ollama list     — List all available local models with sizes
+ *   /ollama pull     — Pull a model with progress
+ *   /ollama remove   — Delete a local model
+ *   /ollama ps       — Show running models and resource usage
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+export function registerOllamaCommands(pi: ExtensionAPI): void {
+	pi.registerCommand("ollama", {
+		description: "Manage local Ollama models — list | pull | remove | ps",
+		async handler(args, ctx) {
+			const parts = (args ?? "").trim().split(/\s+/);
+			const subcommand = parts[0] || "status";
+			const modelArg = parts.slice(1).join(" ");
+
+			switch (subcommand) {
+				case "status":
+					return await handleStatus(ctx);
+				case "list":
+				case "ls":
+					return await handleList(ctx);
+				case "pull":
+					return await handlePull(modelArg, ctx);
+				case "remove":
+				case "rm":
+				case "delete":
+					return await handleRemove(modelArg, ctx);
+				case "ps":
+					return await handlePs(ctx);
+				default:
+					ctx.ui.notify(
+						`Unknown subcommand: ${subcommand}. Use: status, list, pull, remove, ps`,
+						"warning",
+					);
+			}
+		},
+	});
+}
+
+async function handleStatus(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify(
+			"Ollama is not running. Install from https://ollama.com and run 'ollama serve'",
+			"warning",
+		);
+		return;
+	}
+
+	const version = await client.getVersion();
+	const lines: string[] = [];
+	lines.push(`Ollama${version ? ` v${version}` : ""} — running (${client.getOllamaHost()})`);
+
+	// Show loaded models
+	try {
+		const ps = await client.getRunningModels();
+		if (ps.models && ps.models.length > 0) {
+			lines.push("");
+			lines.push("Loaded:");
+			for (const m of ps.models) {
+				const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU";
+				const expiresAt = new Date(m.expires_at);
+				const idleMs = expiresAt.getTime() - Date.now();
+				const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+				lines.push(`  ${m.name}  ${vram}  expires in ${idleMin}m`);
+			}
+		}
+	} catch {
+		// ps endpoint may not be available on older versions
+	}
+
+	// Show available models
+	try {
+		const models = await discoverModels();
+		if (models.length > 0) {
+			lines.push("");
+			lines.push("Available:");
+			for (const m of models) {
+				lines.push(`  ${formatModelForDisplay(m)}`);
+			}
+		} else {
+			lines.push("");
+			lines.push("No models pulled. Use /ollama pull <model> to get started.");
+		}
+	} catch (err) {
+		lines.push("");
+		lines.push(`Error listing models: ${err instanceof Error ? err.message : String(err)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handleList(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) {
+		ctx.ui.notify("No models available. Use /ollama pull <model> to download one.", "info");
+		return;
+	}
+
+	const lines = ["Local Ollama models:", ""];
+	for (const m of models) {
+		lines.push(`  ${formatModelForDisplay(m)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handlePull(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama pull <model> (e.g. /ollama pull llama3.1:8b)", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	ctx.ui.setWidget("ollama-pull", [`Pulling ${modelName}...`]);
+
+	try {
+		let lastPercent = -1;
+		await client.pullModel(modelName, (progress) => {
+			if (progress.total && progress.completed) {
+				const percent = Math.floor((progress.completed / progress.total) * 100);
+				if (percent !== lastPercent) {
+					lastPercent = percent;
+					const completed = formatModelSize(progress.completed);
+					const total = formatModelSize(progress.total);
+					ctx.ui.setWidget("ollama-pull", [
+						`Pulling ${modelName}... ${percent}% (${completed} / ${total})`,
+					]);
+				}
+			} else if (progress.status) {
+				ctx.ui.setWidget("ollama-pull", [`${modelName}: ${progress.status}`]);
+			}
+		});
+
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(`${modelName} pulled successfully`, "success");
+	} catch (err) {
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(
+			`Failed to pull ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handleRemove(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama remove <model>", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const confirmed = await ctx.ui.confirm(
+		"Delete model",
+		`Are you sure you want to delete ${modelName}?`,
+	);
+
+	if (!confirmed) return;
+
+	try {
+		await client.deleteModel(modelName);
+		ctx.ui.notify(`${modelName} deleted`, "success");
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to delete ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handlePs(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	try {
+		const ps = await client.getRunningModels();
+		if (!ps.models || ps.models.length === 0) {
+			ctx.ui.notify("No models currently loaded in memory", "info");
+			return;
+		}
+
+		const lines = ["Running models:", ""];
+		for (const m of ps.models) {
+			const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU only";
+			const totalSize = formatModelSize(m.size);
+			const expiresAt = new Date(m.expires_at);
+			const idleMs = expiresAt.getTime() - Date.now();
+			const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+			lines.push(`  ${m.name}  ${totalSize}  ${vram}  expires in ${idleMin}m`);
+		}
+
+		await ctx.ui.custom(
+			(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+				const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+				setTimeout(() => done(undefined), 0);
+				return text;
+			},
+		);
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to get running models: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts
new file mode 100644
index 000000000..eb6916b9e
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-discovery.ts
@@ -0,0 +1,106 @@
+// GSD2 — Ollama model discovery and capability detection
+
+/**
+ * Discovers locally available Ollama models and enriches them with
+ * capability metadata (context window, vision, reasoning) from the
+ * known model table and /api/show responses.
+ *
+ * Returns models in the format expected by pi.registerProvider().
+ */
+
+import { listModels, getOllamaHost } from "./ollama-client.js";
+import {
+	estimateContextFromParams,
+	formatModelSize,
+	getModelCapabilities,
+	humanizeModelName,
+} from "./model-capabilities.js";
+import type { OllamaModelInfo } from "./types.js";
+
+export interface DiscoveredOllamaModel {
+	id: string;
+	name: string;
+	reasoning: boolean;
+	input: ("text" | "image")[];
+	cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow: number;
+	maxTokens: number;
+	/** Raw size in bytes for display purposes */
+	sizeBytes: number;
+	/** Parameter size string from Ollama (e.g. "7B") */
+	parameterSize: string;
+}
+
+const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
+
+function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel {
+	const caps = getModelCapabilities(info.name);
+	const parameterSize = info.details?.parameter_size ?? "";
+
+	// Determine context window: known table > estimate from param size > default
+	const contextWindow =
+		caps.contextWindow ??
+		(parameterSize ? estimateContextFromParams(parameterSize) : 8192);
+
+	// Determine max tokens: known table > fraction of context > default
+	const maxTokens =
+		caps.maxTokens ?? Math.min(Math.floor(contextWindow / 4), 16384);
+
+	// Detect vision from families or known table
+	const hasVision =
+		caps.input?.includes("image") ??
+		(info.details?.families?.some((f) => f === "clip" || f === "mllama") ?? false);
+
+	// Detect reasoning from known table
+	const reasoning = caps.reasoning ?? false;
+
+	return {
+		id: info.name,
+		name: humanizeModelName(info.name),
+		reasoning,
+		input: hasVision ? ["text", "image"] : ["text"],
+		cost: ZERO_COST,
+		contextWindow,
+		maxTokens,
+		sizeBytes: info.size,
+		parameterSize,
+	};
+}
+
+/**
+ * Discover all locally available Ollama models with enriched capabilities.
+ */
+export async function discoverModels(): Promise<DiscoveredOllamaModel[]> {
+	const tags = await listModels();
+	if (!tags.models || tags.models.length === 0) return [];
+
+	return tags.models.map(enrichModel);
+}
+
+/**
+ * Format a discovered model for display in model list.
+ */
+export function formatModelForDisplay(model: DiscoveredOllamaModel): string {
+	const parts = [model.id];
+
+	if (model.sizeBytes > 0) {
+		parts.push(`(${formatModelSize(model.sizeBytes)})`);
+	}
+
+	const flags: string[] = [];
+	if (model.reasoning) flags.push("reasoning");
+	if (model.input.includes("image")) flags.push("vision");
+
+	if (flags.length > 0) {
+		parts.push(`[${flags.join(", ")}]`);
+	}
+
+	return parts.join(" ");
+}
+
+/**
+ * Build the OpenAI-compat base URL for Ollama.
+ */
+export function getOllamaOpenAIBaseUrl(): string {
+	return `${getOllamaHost()}/v1`;
+}
diff --git a/src/resources/extensions/ollama/ollama-tool.ts b/src/resources/extensions/ollama/ollama-tool.ts
new file mode 100644
index 000000000..ad5af5885
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-tool.ts
@@ -0,0 +1,218 @@
+// GSD2 — LLM-callable Ollama management tool
+/**
+ * Registers an ollama_manage tool that the LLM can call to interact
+ * with the local Ollama instance — list models, pull new ones, check status.
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import { Type } from "@sinclair/typebox";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+interface OllamaToolDetails {
+	action: string;
+	model?: string;
+	modelCount?: number;
+	durationMs: number;
+	error?: string;
+}
+
+export function registerOllamaTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "ollama_manage",
+		label: "Ollama",
+		description:
+			"Manage local Ollama models. List available models, pull new ones, " +
+			"check Ollama status, or see running models and resource usage. " +
+			"Use this when you need a specific local model that isn't available yet.",
+		promptSnippet: "Manage local Ollama models (list, pull, status, ps)",
+		promptGuidelines: [
+			"Use 'list' to see what models are available locally before trying to use one.",
+			"Use 'pull' to download a model that isn't available yet.",
+			"Use 'status' to check if Ollama is running.",
+			"Use 'ps' to see which models are loaded in memory and VRAM usage.",
+			"Common models: llama3.1:8b, qwen2.5-coder:7b, deepseek-r1:8b, codestral:22b",
+		],
+		parameters: Type.Object({
+			action: Type.Union(
+				[
+					Type.Literal("list"),
+					Type.Literal("pull"),
+					Type.Literal("status"),
+					Type.Literal("ps"),
+				],
+				{ description: "Action to perform" },
+			),
+			model: Type.Optional(
+				Type.String({ description: "Model name (required for pull)" }),
+			),
+		}),
+
+		async execute(_toolCallId, params, signal, onUpdate, _ctx) {
+			const startTime = Date.now();
+			const { action, model } = params;
+
+			try {
+				switch (action) {
+					case "status": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running. It needs to be started with 'ollama serve'." }],
+								details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+						const version = await client.getVersion();
+						return {
+							content: [{ type: "text", text: `Ollama${version ? ` v${version}` : ""} is running at ${client.getOllamaHost()}` }],
+							details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "list": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const models = await discoverModels();
+						if (models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models available. Pull one with action='pull'." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = models.map((m) => formatModelForDisplay(m));
+						return {
+							content: [{ type: "text", text: `Available models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "pull": {
+						if (!model) {
+							return {
+								content: [{ type: "text", text: "Error: 'model' parameter is required for pull action." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails,
+							};
+						}
+
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						let lastStatus = "";
+						await client.pullModel(model, (progress) => {
+							if (progress.total && progress.completed) {
+								const pct = Math.floor((progress.completed / progress.total) * 100);
+								const status = `Pulling ${model}... ${pct}%`;
+								if (status !== lastStatus) {
+									lastStatus = status;
+									onUpdate?.({ content: [{ type: "text", text: status }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+								}
+							} else if (progress.status && progress.status !== lastStatus) {
+								lastStatus = progress.status;
+								onUpdate?.({ content: [{ type: "text", text: `${model}: ${progress.status}` }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+							}
+						}, signal);
+
+						return {
+							content: [{ type: "text", text: `Successfully pulled ${model}` }],
+							details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "ps": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const ps = await client.getRunningModels();
+						if (!ps.models || ps.models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models currently loaded in memory." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = ps.models.map((m) => {
+							const vram = m.size_vram > 0 ? `${formatModelSize(m.size_vram)} VRAM` : "CPU";
+							return `${m.name} — ${formatModelSize(m.size)} total, ${vram}`;
+						});
+
+						return {
+							content: [{ type: "text", text: `Loaded models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: ps.models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					default:
+						return {
+							content: [{ type: "text", text: `Unknown action: ${action}` }],
+							isError: true,
+							details: { action, durationMs: Date.now() - startTime, error: "unknown_action" } as OllamaToolDetails,
+						};
+				}
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: `Ollama error: ${msg}` }],
+					isError: true,
+					details: { action, model, durationMs: Date.now() - startTime, error: msg } as OllamaToolDetails,
+				};
+			}
+		},
+
+		renderCall(args, theme) {
+			let text = theme.fg("toolTitle", theme.bold("ollama "));
+			text += theme.fg("accent", args.action);
+			if (args.model) {
+				text += theme.fg("dim", ` ${args.model}`);
+			}
+			return new Text(text, 0, 0);
+		},
+
+		renderResult(result, { isPartial, expanded }, theme) {
+			const d = result.details as OllamaToolDetails | undefined;
+
+			if (isPartial) return new Text(theme.fg("warning", "Working..."), 0, 0);
+			if ((result as any).isError || d?.error) {
+				return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+			}
+
+			let text = theme.fg("success", d?.action ?? "done");
+			if (d?.modelCount !== undefined) {
+				text += theme.fg("dim", ` (${d.modelCount} models)`);
+			}
+			text += theme.fg("dim", ` ${d?.durationMs ?? 0}ms`);
+
+			if (expanded) {
+				const content = result.content[0];
+				if (content?.type === "text") {
+					const preview = content.text.split("\n").slice(0, 10).join("\n");
+					text += "\n\n" + theme.fg("dim", preview);
+				}
+			}
+
+			return new Text(text, 0, 0);
+		},
+	});
+}
diff --git a/src/resources/extensions/ollama/tests/model-capabilities.test.ts b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
new file mode 100644
index 000000000..61af68e9b
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
@@ -0,0 +1,162 @@
+// GSD2 — Tests for Ollama model capability detection
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+	getModelCapabilities,
+	estimateContextFromParams,
+	humanizeModelName,
+	formatModelSize,
+} from "../model-capabilities.js";
+
+// ─── getModelCapabilities ────────────────────────────────────────────────────
+
+describe("getModelCapabilities", () => {
+	it("returns reasoning for deepseek-r1 models", () => {
+		const caps = getModelCapabilities("deepseek-r1:8b");
+		assert.equal(caps.reasoning, true);
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns reasoning for qwq models", () => {
+		const caps = getModelCapabilities("qwq:32b");
+		assert.equal(caps.reasoning, true);
+	});
+
+	it("returns vision for llava models", () => {
+		const caps = getModelCapabilities("llava:7b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns vision for llama3.2-vision models", () => {
+		const caps = getModelCapabilities("llama3.2-vision:11b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns correct context for llama3.1", () => {
+		const caps = getModelCapabilities("llama3.1:8b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for llama3 (no .1)", () => {
+		const caps = getModelCapabilities("llama3:8b");
+		assert.equal(caps.contextWindow, 8192);
+	});
+
+	it("returns correct context for llama2", () => {
+		const caps = getModelCapabilities("llama2:7b");
+		assert.equal(caps.contextWindow, 4096);
+	});
+
+	it("returns correct context for qwen2.5-coder", () => {
+		const caps = getModelCapabilities("qwen2.5-coder:7b");
+		assert.equal(caps.contextWindow, 131072);
+		assert.equal(caps.maxTokens, 32768);
+	});
+
+	it("returns correct context for codestral", () => {
+		const caps = getModelCapabilities("codestral:22b");
+		assert.equal(caps.contextWindow, 262144);
+	});
+
+	it("returns correct context for mistral-nemo", () => {
+		const caps = getModelCapabilities("mistral-nemo:12b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for gemma3", () => {
+		const caps = getModelCapabilities("gemma3:9b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns empty object for unknown models", () => {
+		const caps = getModelCapabilities("totally-unknown-model:3b");
+		assert.deepEqual(caps, {});
+	});
+
+	it("strips tag before matching", () => {
+		const caps = getModelCapabilities("llama3.1:70b-instruct-q4_0");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("matches case-insensitively", () => {
+		const caps = getModelCapabilities("Llama3.1:8B");
+		assert.equal(caps.contextWindow, 131072);
+	});
+});
+
+// ─── estimateContextFromParams ───────────────────────────────────────────────
+
+describe("estimateContextFromParams", () => {
+	it("estimates 8192 for small models", () => {
+		assert.equal(estimateContextFromParams("1.5B"), 8192);
+	});
+
+	it("estimates 16384 for 7B models", () => {
+		assert.equal(estimateContextFromParams("7B"), 16384);
+	});
+
+	it("estimates 32768 for 13B models", () => {
+		assert.equal(estimateContextFromParams("13B"), 32768);
+	});
+
+	it("estimates 65536 for 34B models", () => {
+		assert.equal(estimateContextFromParams("34B"), 65536);
+	});
+
+	it("estimates 131072 for 70B+ models", () => {
+		assert.equal(estimateContextFromParams("70B"), 131072);
+	});
+
+	it("handles decimal sizes", () => {
+		assert.equal(estimateContextFromParams("7.5B"), 16384);
+	});
+
+	it("handles M (millions)", () => {
+		assert.equal(estimateContextFromParams("500M"), 8192);
+	});
+
+	it("returns 8192 for unparseable input", () => {
+		assert.equal(estimateContextFromParams("unknown"), 8192);
+	});
+
+	it("returns 8192 for empty string", () => {
+		assert.equal(estimateContextFromParams(""), 8192);
+	});
+});
+
+// ─── humanizeModelName ───────────────────────────────────────────────────────
+
+describe("humanizeModelName", () => {
+	it("capitalizes and adds tag", () => {
+		assert.equal(humanizeModelName("llama3.1:8b"), "Llama 3.1 8B");
+	});
+
+	it("handles latest tag", () => {
+		assert.equal(humanizeModelName("llama3.1:latest"), "Llama 3.1");
+	});
+
+	it("handles no tag", () => {
+		assert.equal(humanizeModelName("llama3.1"), "Llama 3.1");
+	});
+
+	it("handles hyphenated names", () => {
+		const result = humanizeModelName("deepseek-r1:8b");
+		assert.ok(result.includes("8B"));
+	});
+});
+
+// ─── formatModelSize ─────────────────────────────────────────────────────────
+
+describe("formatModelSize", () => {
+	it("formats GB", () => {
+		assert.equal(formatModelSize(4_700_000_000), "4.7 GB");
+	});
+
+	it("formats MB", () => {
+		assert.equal(formatModelSize(500_000_000), "500.0 MB");
+	});
+
+	it("formats KB", () => {
+		assert.equal(formatModelSize(500_000), "500 KB");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-client.test.ts b/src/resources/extensions/ollama/tests/ollama-client.test.ts
new file mode 100644
index 000000000..0deae397a
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-client.test.ts
@@ -0,0 +1,38 @@
+// GSD2 — Tests for Ollama HTTP client
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { getOllamaHost } from "../ollama-client.js";
+
+// ─── getOllamaHost ──────────────────────────────────────────────────────────
+
+describe("getOllamaHost", () => {
+	const originalHost = process.env.OLLAMA_HOST;
+
+	afterEach(() => {
+		if (originalHost === undefined) {
+			delete process.env.OLLAMA_HOST;
+		} else {
+			process.env.OLLAMA_HOST = originalHost;
+		}
+	});
+
+	it("returns default when OLLAMA_HOST is not set", () => {
+		delete process.env.OLLAMA_HOST;
+		assert.equal(getOllamaHost(), "http://localhost:11434");
+	});
+
+	it("returns OLLAMA_HOST when set with scheme", () => {
+		process.env.OLLAMA_HOST = "http://myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("adds http:// when OLLAMA_HOST has no scheme", () => {
+		process.env.OLLAMA_HOST = "myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("preserves https:// scheme", () => {
+		process.env.OLLAMA_HOST = "https://secure-ollama.example.com";
+		assert.equal(getOllamaHost(), "https://secure-ollama.example.com");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
new file mode 100644
index 000000000..b69cf84e1
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
@@ -0,0 +1,28 @@
+// GSD2 — Tests for Ollama model discovery and enrichment
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { getOllamaOpenAIBaseUrl } from "../ollama-discovery.js";
+
+// ─── getOllamaOpenAIBaseUrl ─────────────────────────────────────────────────
+
+describe("getOllamaOpenAIBaseUrl", () => {
+	const originalHost = process.env.OLLAMA_HOST;
+
+	afterEach(() => {
+		if (originalHost === undefined) {
+			delete process.env.OLLAMA_HOST;
+		} else {
+			process.env.OLLAMA_HOST = originalHost;
+		}
+	});
+
+	it("returns default OpenAI-compat URL", () => {
+		delete process.env.OLLAMA_HOST;
+		assert.equal(getOllamaOpenAIBaseUrl(), "http://localhost:11434/v1");
+	});
+
+	it("appends /v1 to custom OLLAMA_HOST", () => {
+		process.env.OLLAMA_HOST = "http://remote:9999";
+		assert.equal(getOllamaOpenAIBaseUrl(), "http://remote:9999/v1");
+	});
+});
diff --git a/src/resources/extensions/ollama/types.ts b/src/resources/extensions/ollama/types.ts
new file mode 100644
index 000000000..5f2c88705
--- /dev/null
+++ b/src/resources/extensions/ollama/types.ts
@@ -0,0 +1,130 @@
+// GSD2 — Ollama API response types
+
+/**
+ * Type definitions for the Ollama REST API.
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+// ─── /api/tags ──────────────────────────────────────────────────────────────
+
+export interface OllamaModelDetails {
+	parent_model: string;
+	format: string;
+	family: string;
+	families: string[] | null;
+	parameter_size: string;
+	quantization_level: string;
+}
+
+export interface OllamaModelInfo {
+	name: string;
+	model: string;
+	modified_at: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+}
+
+export interface OllamaTagsResponse {
+	models: OllamaModelInfo[];
+}
+
+// ─── /api/show ──────────────────────────────────────────────────────────────
+
+export interface OllamaShowResponse {
+	modelfile: string;
+	parameters: string;
+	template: string;
+	details: OllamaModelDetails;
+	model_info: Record<string, unknown>;
+}
+
+// ─── /api/ps ────────────────────────────────────────────────────────────────
+
+export interface OllamaRunningModel {
+	name: string;
+	model: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+	expires_at: string;
+	size_vram: number;
+}
+
+export interface OllamaPsResponse {
+	models: OllamaRunningModel[];
+}
+
+// ─── /api/pull ──────────────────────────────────────────────────────────────
+
+export interface OllamaPullProgress {
+	status: string;
+	digest?: string;
+	total?: number;
+	completed?: number;
+}
+
+// ─── /api/version ───────────────────────────────────────────────────────────
+
+export interface OllamaVersionResponse {
+	version: string;
+}
+
+// ─── /api/chat ──────────────────────────────────────────────────────────────
+
+export interface OllamaChatMessage {
+	role: "system" | "user" | "assistant" | "tool";
+	content: string;
+	images?: string[];
+	tool_calls?: OllamaToolCall[];
+}
+
+export interface OllamaToolCall {
+	function: {
+		name: string;
+		arguments: Record<string, unknown>;
+	};
+}
+
+export interface OllamaTool {
+	type: "function";
+	function: {
+		name: string;
+		description: string;
+		parameters: {
+			type: "object";
+			required?: string[];
+			properties: Record<string, unknown>;
+		};
+	};
+}
+
+export interface OllamaChatRequest {
+	model: string;
+	messages: OllamaChatMessage[];
+	stream?: boolean;
+	tools?: OllamaTool[];
+	options?: {
+		num_ctx?: number;
+		num_predict?: number;
+		temperature?: number;
+		top_p?: number;
+		top_k?: number;
+		stop?: string[];
+	};
+	keep_alive?: string;
+}
+
+export interface OllamaChatResponse {
+	model: string;
+	created_at: string;
+	message: OllamaChatMessage;
+	done: boolean;
+	done_reason?: string;
+	total_duration?: number;
+	load_duration?: number;
+	prompt_eval_count?: number;
+	prompt_eval_duration?: number;
+	eval_count?: number;
+	eval_duration?: number;
+}