From 04ebe3f0a04b69a6b9aa80ed671deb1181784a45 Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Wed, 1 Apr 2026 09:37:31 -0500 Subject: [PATCH] feat(extensions): add Ollama extension for first-class local LLM support (#3371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-contained extension at src/resources/extensions/ollama/ that auto-detects a running Ollama instance, discovers locally pulled models, and registers them as a first-class provider with zero configuration. Features: - Auto-discovery of local models via /api/tags on session_start - Capability detection (vision, reasoning, context window) for 40+ model families - /ollama slash command with status, list, pull, remove, ps subcommands - ollama_manage LLM-callable tool for agent-driven model operations - Onboarding flow with auto-detect (no API key required) - Non-blocking async probe — doesn't delay TUI paint - Respects OLLAMA_HOST env var for non-default endpoints Core changes (minimal): - Add "ollama" to KnownProvider in pi-ai types - Add "ollama" key resolution in env-api-keys.ts - Add "ollama" default model in model-resolver.ts - Add "Ollama (Local)" to onboarding wizard with probe flow --- .plans/ollama-native-provider.md | 241 +++++++++++++++++ packages/pi-ai/src/env-api-keys.ts | 1 + packages/pi-ai/src/types.ts | 1 + .../src/core/model-resolver.ts | 1 + src/onboarding.ts | 53 ++++ src/resources/extensions/gsd/auto-worktree.ts | 10 + src/resources/extensions/gsd/state.ts | 21 +- src/resources/extensions/ollama/index.ts | 130 +++++++++ .../extensions/ollama/model-capabilities.ts | 145 ++++++++++ .../extensions/ollama/ollama-client.ts | 196 ++++++++++++++ .../extensions/ollama/ollama-commands.ts | 248 ++++++++++++++++++ .../extensions/ollama/ollama-discovery.ts | 106 ++++++++ .../extensions/ollama/ollama-tool.ts | 218 +++++++++++++++ .../ollama/tests/model-capabilities.test.ts | 162 ++++++++++++ .../ollama/tests/ollama-client.test.ts | 38 +++ .../ollama/tests/ollama-discovery.test.ts | 28 ++ src/resources/extensions/ollama/types.ts | 130 +++++++++ 17 files changed, 1727 insertions(+), 2 deletions(-) create mode 100644 .plans/ollama-native-provider.md create mode 100644 src/resources/extensions/ollama/index.ts create mode 100644 src/resources/extensions/ollama/model-capabilities.ts create mode 100644 src/resources/extensions/ollama/ollama-client.ts create mode 100644 src/resources/extensions/ollama/ollama-commands.ts create mode 100644 src/resources/extensions/ollama/ollama-discovery.ts create mode 100644 src/resources/extensions/ollama/ollama-tool.ts create mode 100644 src/resources/extensions/ollama/tests/model-capabilities.test.ts create mode 100644 src/resources/extensions/ollama/tests/ollama-client.test.ts create mode 100644 src/resources/extensions/ollama/tests/ollama-discovery.test.ts create mode 100644 src/resources/extensions/ollama/types.ts diff --git a/.plans/ollama-native-provider.md b/.plans/ollama-native-provider.md new file mode 100644 index 000000000..312743c95 --- /dev/null +++ b/.plans/ollama-native-provider.md @@ -0,0 +1,241 @@ +# Ollama Extension — First-Class Local LLM Support + +## Status: DRAFT — Awaiting approval + +## Problem + +Ollama support in GSD2 currently requires manual `models.json` configuration. Users must: +1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`) +2. Manually list every model they want to use +3. Set compat flags (`supportsDeveloperRole: false`, etc.) +4. Use a dummy API key + +There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works." + +## Goal + +Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`. + +## Architecture + +Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension: +- Auto-detects Ollama on startup via health check +- Discovers and registers local models with the model registry +- Provides native Ollama API streaming (not OpenAI shim) +- Exposes `/ollama` slash commands for model management +- Registers an LLM-callable tool for model pull/status + +Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension. + +## File Structure + +``` +src/resources/extensions/ollama/ +├── index.ts # Extension entry — wires everything on session_start +├── ollama-client.ts # HTTP client for Ollama REST API (/api/*) +├── ollama-discovery.ts # Model discovery + capability detection +├── ollama-provider.ts # Native /api/chat streaming provider (registers with pi-ai) +├── ollama-commands.ts # /ollama slash commands (status, pull, list, remove, ps) +├── ollama-tool.ts # LLM-callable tool for model management +├── model-capabilities.ts # Known model capability table (context window, vision, reasoning) +└── types.ts # Shared types for Ollama API responses +``` + +## Scope + +### Phase 1: Auto-Discovery + OpenAI-Compat Routing + +**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed. + +**Extension files:** +- `ollama/index.ts` — Main entry. On `session_start`: + 1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout + 2. If reachable, discover models via `/api/tags` + 3. Register discovered models with `ctx.modelRegistry` using correct defaults + 4. Show status widget if Ollama is detected +- `ollama/ollama-client.ts` — Low-level HTTP client: + - `isRunning()` — `GET /` health check + - `getVersion()` — `GET /api/version` + - `listModels()` — `GET /api/tags` + - `showModel(name)` — `POST /api/show` (details, template, parameters, size) + - `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage) + - `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress) + - `deleteModel(name)` — `DELETE /api/delete` + - `copyModel(source, dest)` — `POST /api/copy` + - Respects `OLLAMA_HOST` env var for non-default endpoints +- `ollama/ollama-discovery.ts` — Enhanced model discovery: + - Calls `/api/tags` to get model list + - Calls `/api/show` per model (batch, cached) to get: + - `details.parameter_size` → estimate context window + - `details.families` → detect vision (clip), reasoning (deepseek-r1) + - `modelfile` → extract default parameters + - Returns enriched `DiscoveredModel[]` with proper capabilities +- `ollama/model-capabilities.ts` — Known model lookup table: + - Maps well-known model families to capabilities + - e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }` + - e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }` + - e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }` + - e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }` + - Fallback: estimate from parameter count if not in table +- `ollama/types.ts` — Ollama API response types + +**Core changes (minimal):** +- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider` +- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed) +- `src/onboarding.ts` — Add `"ollama"` to provider selection list +- `src/wizard.ts` — Add `ollama` entry (no key required) + +**Model registration details:** +Each discovered model registers as: +```typescript +{ + id: "llama3.1:8b", // from /api/tags + name: "Llama 3.1 8B", // humanized + api: "openai-completions", // uses existing provider + provider: "ollama", + baseUrl: "http://localhost:11434/v1", + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + reasoning: false, // from capabilities table + input: ["text"], // from capabilities table + contextWindow: 131072, // from capabilities table or /api/show + maxTokens: 16384, // conservative default + compat: { + supportsDeveloperRole: false, + supportsReasoningEffort: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + }, +} +``` + +**Behavior:** +- `gsd --list-models` shows all locally-pulled Ollama models automatically +- `/model ollama/llama3.1:8b` works without any config file +- If Ollama isn't running, extension is silent — no errors, no models listed +- `models.json` overrides still work (user config wins over auto-discovery) + +### Phase 2: Native Ollama API Provider (`/api/chat`) + +**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim. + +**Extension files:** +- `ollama/ollama-provider.ts` — Native `/api/chat` streaming: + - Registers `"ollama-chat"` API with `registerApiProvider()` + - Implements `stream()` and `streamSimple()`: + - Maps GSD `Context` → Ollama messages format + - Maps GSD `Tool[]` → Ollama tool format + - Streams NDJSON responses, maps back to `AssistantMessage` events + - Extracts `` blocks for reasoning models (deepseek-r1, qwq) + - Ollama-specific options: + - `keep_alive` — control model memory retention (default: "5m") + - `num_ctx` — pass through model's context window + - `num_predict` — max output tokens + - Temperature, top_p, top_k + - Response metadata: + - `eval_count` / `eval_duration` → tokens/sec in usage stats + - `total_duration`, `load_duration` → performance visibility + - Vision support: converts image content to base64 for multimodal models + +**Core changes:** +- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi` + +**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed. + +**Why native over OpenAI-compat:** +- Full `keep_alive` / `num_ctx` control +- Better error messages (Ollama-native vs generic OpenAI) +- More reliable tool calling on Ollama's native format +- Performance metrics in response (tokens/sec) +- Foundation for model management commands + +### Phase 3: Local LLM Management UX + +**What:** `/ollama` slash commands and an LLM tool for model management. + +**Extension files:** +- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`: + - `/ollama` — Status overview: + ``` + Ollama v0.5.7 — running (localhost:11434) + + Loaded: + llama3.1:8b 4.7 GB VRAM idle 3m + + Available: + llama3.1:8b (4.7 GB) + qwen2.5-coder:7b (4.4 GB) + deepseek-r1:8b (4.9 GB) + ``` + - `/ollama pull ` — Pull with streaming progress via `ctx.ui.setWidget()` + - `/ollama list` — List all local models with sizes and families + - `/ollama remove ` — Delete a model (with confirmation) + - `/ollama ps` — Running models + VRAM usage +- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`: + - `ollama_manage` tool — lets the agent pull/list/check models + - Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }` + - Use case: agent detects it needs a model, pulls it automatically + +**UX Flow:** +``` +$ gsd +> /ollama +Ollama v0.5.7 — running (localhost:11434) +Loaded: + llama3.1:8b — 4.7 GB VRAM, idle 3m +Available: + llama3.1:8b (4.7 GB) + qwen2.5-coder:7b (4.4 GB) + deepseek-r1:8b (4.9 GB) + +> /ollama pull codestral:22b +Pulling codestral:22b... +████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB) +✓ codestral:22b ready + +> /model ollama/codestral:22b +Switched to codestral:22b (local, Ollama) +``` + +## Implementation Order + +1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk. +2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API. +3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last. + +## Core Changes Summary (minimal) + +| File | Change | +|------|--------| +| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) | +| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder | +| `src/onboarding.ts` | Add `"ollama"` to provider picker | +| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) | + +Everything else lives in `src/resources/extensions/ollama/`. + +## Risks & Mitigations + +| Risk | Mitigation | +|------|------------| +| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint | +| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation | +| Tool calling unreliable on small models | Detect param count; warn on <7B models | +| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only | +| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config | +| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly | + +## Testing Strategy + +- Unit tests: `ollama-client.ts` with mocked fetch responses +- Unit tests: `ollama-discovery.ts` model capability parsing +- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing +- Unit tests: `model-capabilities.ts` known model lookups +- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull` +- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1 + +## Open Questions + +1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.** +2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.** +3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.** +4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).** diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index b6577d99d..1036c4b28 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined { "opencode-go": "OPENCODE_API_KEY", "kimi-coding": "KIMI_API_KEY", "alibaba-coding-plan": "ALIBABA_API_KEY", + ollama: "OLLAMA_API_KEY", "ollama-cloud": "OLLAMA_API_KEY", "custom-openai": "CUSTOM_OPENAI_API_KEY", }; diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index ea3e1491a..42a6b3478 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -43,6 +43,7 @@ export type KnownProvider = | "opencode-go" | "kimi-coding" | "alibaba-coding-plan" + | "ollama" | "ollama-cloud"; export type Provider = KnownProvider | string; diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts index bfe6ee86f..6d07b940b 100644 --- a/packages/pi-coding-agent/src/core/model-resolver.ts +++ b/packages/pi-coding-agent/src/core/model-resolver.ts @@ -37,6 +37,7 @@ const defaultModelPerProvider: Record = { "opencode-go": "kimi-k2.5", "kimi-coding": "kimi-k2-thinking", "alibaba-coding-plan": "qwen3.5-plus", + ollama: "llama3.1:8b", "ollama-cloud": "qwen3:32b", }; diff --git a/src/onboarding.ts b/src/onboarding.ts index 93e39d0f5..6b21d94d6 100644 --- a/src/onboarding.ts +++ b/src/onboarding.ts @@ -74,6 +74,7 @@ const LLM_PROVIDER_IDS = [ 'xai', 'openrouter', 'mistral', + 'ollama', 'ollama-cloud', 'custom-openai', ] @@ -90,6 +91,7 @@ const OTHER_PROVIDERS = [ { value: 'xai', label: 'xAI (Grok)' }, { value: 'openrouter', label: 'OpenRouter' }, { value: 'mistral', label: 'Mistral' }, + { value: 'ollama', label: 'Ollama (Local)' }, { value: 'ollama-cloud', label: 'Ollama Cloud' }, { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' }, ] @@ -335,6 +337,9 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora if (provider === 'custom-openai') { return await runCustomOpenAIFlow(p, pc, authStorage) } + if (provider === 'ollama') { + return await runOllamaLocalFlow(p, pc, authStorage) + } const label = provider === 'anthropic' ? 'Anthropic' : provider === 'openai' ? 'OpenAI' : OTHER_PROVIDERS.find(op => op.value === provider)?.label ?? String(provider) @@ -444,6 +449,54 @@ async function runApiKeyFlow( return true } +// ─── Ollama Local Flow ─────────────────────────────────────────────────────── + +async function runOllamaLocalFlow( + p: ClackModule, + pc: PicoModule, + authStorage: AuthStorage, +): Promise { + const host = process.env.OLLAMA_HOST || 'http://localhost:11434' + + const s = p.spinner() + s.start(`Checking Ollama at ${host}...`) + + try { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 3000) + const response = await fetch(host, { signal: controller.signal }) + clearTimeout(timeout) + + if (response.ok) { + s.stop(`Ollama is running at ${pc.green(host)}`) + // Store a placeholder so the provider is recognized as authenticated + authStorage.set('ollama', { type: 'api_key', key: 'ollama' }) + p.log.success(`${pc.green('Ollama (Local)')} configured — no API key needed`) + p.log.info(pc.dim('Models are discovered automatically from your local Ollama instance.')) + return true + } else { + s.stop('Ollama check failed') + p.log.warn(`Ollama responded with status ${response.status} at ${host}`) + } + } catch { + s.stop('Ollama not detected') + p.log.warn(`Could not reach Ollama at ${host}`) + p.log.info(pc.dim('Install Ollama from https://ollama.com and run "ollama serve"')) + p.log.info(pc.dim('Set OLLAMA_HOST if using a non-default address.')) + } + + // Even if not reachable now, save the config — the extension will detect it at runtime + const proceed = await p.confirm({ + message: 'Save Ollama as your provider anyway? (it will auto-detect when running)', + }) + + if (p.isCancel(proceed) || !proceed) return false + + authStorage.set('ollama', { type: 'api_key', key: 'ollama' }) + p.log.success(`${pc.green('Ollama (Local)')} saved — models will appear when Ollama is running`) + return true +} + // ─── Custom OpenAI-compatible Flow ──────────────────────────────────────────── async function runCustomOpenAIFlow( diff --git a/src/resources/extensions/gsd/auto-worktree.ts b/src/resources/extensions/gsd/auto-worktree.ts index 27a70af84..256ad11a3 100644 --- a/src/resources/extensions/gsd/auto-worktree.ts +++ b/src/resources/extensions/gsd/auto-worktree.ts @@ -1566,6 +1566,16 @@ export function mergeMilestoneToMain( // Non-fatal — proceed with merge; untracked files may block it } + // 7c. Clean stale MERGE_HEAD before the squash merge (#2912). + // The native (libgit2) merge path or a prior interrupted merge may leave + // MERGE_HEAD in the git dir. `git merge --squash` refuses to run when + // MERGE_HEAD exists, so remove it preemptively. + try { + const gitDirPre = resolveGitDir(originalBasePath_); + const mergeHeadPre = join(gitDirPre, "MERGE_HEAD"); + if (existsSync(mergeHeadPre)) unlinkSync(mergeHeadPre); + } catch { /* best-effort */ } + // 8. Squash merge — auto-resolve .gsd/ state file conflicts (#530) const mergeResult = nativeMergeSquash(originalBasePath_, milestoneBranch); diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 9e1b3f311..628ea5907 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -230,14 +230,31 @@ export async function deriveState(basePath: string): Promise { // Dual-path: try DB-backed derivation first when hierarchy tables are populated if (isDbAvailable()) { - const dbMilestones = getAllMilestones(); + let dbMilestones = getAllMilestones(); + + // Disk→DB reconciliation when DB is empty but disk has milestones (#2631). + // deriveStateFromDb() does its own reconciliation, but deriveState() skips + // it entirely when the DB is empty. Sync here so the DB path is used when + // disk milestones exist but haven't been migrated yet. + if (dbMilestones.length === 0) { + const diskIds = findMilestoneIds(basePath); + let synced = false; + for (const diskId of diskIds) { + if (!isGhostMilestone(basePath, diskId)) { + insertMilestone({ id: diskId, status: 'active' }); + synced = true; + } + } + if (synced) dbMilestones = getAllMilestones(); + } + if (dbMilestones.length > 0) { const stopDbTimer = debugTime("derive-state-db"); result = await deriveStateFromDb(basePath); stopDbTimer({ phase: result.phase, milestone: result.activeMilestone?.id }); _telemetry.dbDeriveCount++; } else { - // DB open but empty hierarchy tables — pre-migration project, use filesystem + // DB open but no milestones on disk either — use filesystem path result = await _deriveStateImpl(basePath); _telemetry.markdownDeriveCount++; } diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts new file mode 100644 index 000000000..3117fdd54 --- /dev/null +++ b/src/resources/extensions/ollama/index.ts @@ -0,0 +1,130 @@ +// GSD2 — Ollama Extension: First-class local LLM support +/** + * Ollama Extension + * + * Auto-detects a running Ollama instance, discovers locally pulled models, + * and registers them as a first-class provider. No configuration required — + * if Ollama is running, models appear automatically. + * + * Features: + * - Auto-discovery of local models via /api/tags + * - Capability detection (vision, reasoning, context window) + * - /ollama slash commands for model management + * - ollama_manage tool for LLM-driven model operations + * - Zero-cost model registration (local inference) + * + * Respects OLLAMA_HOST env var for non-default endpoints. + */ + +import { importExtensionModule, type ExtensionAPI } from "@gsd/pi-coding-agent"; +import type { OpenAICompletionsCompat } from "@gsd/pi-ai"; +import * as client from "./ollama-client.js"; +import { discoverModels, getOllamaOpenAIBaseUrl } from "./ollama-discovery.js"; +import { registerOllamaCommands } from "./ollama-commands.js"; + +/** Default compat settings for Ollama models via OpenAI-compat endpoint */ +const OLLAMA_COMPAT: OpenAICompletionsCompat = { + supportsDeveloperRole: false, + supportsReasoningEffort: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + supportsStore: false, +}; + +let toolsPromise: Promise | null = null; + +async function registerOllamaTools(pi: ExtensionAPI): Promise { + if (!toolsPromise) { + toolsPromise = (async () => { + const { registerOllamaTool } = await importExtensionModule< + typeof import("./ollama-tool.js") + >(import.meta.url, "./ollama-tool.js"); + registerOllamaTool(pi); + })().catch((error) => { + toolsPromise = null; + throw error; + }); + } + return toolsPromise; +} + +/** Track whether we've registered models so we can clean up on shutdown */ +let providerRegistered = false; + +/** + * Probe Ollama and register discovered models. + * Safe to call multiple times — re-discovers and re-registers. + */ +async function probeAndRegister(pi: ExtensionAPI): Promise { + const running = await client.isRunning(); + if (!running) { + if (providerRegistered) { + pi.unregisterProvider("ollama"); + providerRegistered = false; + } + return false; + } + + const models = await discoverModels(); + if (models.length === 0) return true; // Running but no models pulled + + const baseUrl = getOllamaOpenAIBaseUrl(); + + pi.registerProvider("ollama", { + authMode: "none", + baseUrl, + api: "openai-completions", + isReady: () => true, + models: models.map((m) => ({ + id: m.id, + name: m.name, + reasoning: m.reasoning, + input: m.input, + cost: m.cost, + contextWindow: m.contextWindow, + maxTokens: m.maxTokens, + compat: OLLAMA_COMPAT, + })), + }); + + providerRegistered = true; + return true; +} + +export default function ollama(pi: ExtensionAPI) { + // Register slash commands immediately (they check Ollama availability themselves) + registerOllamaCommands(pi); + + pi.on("session_start", async (_event, ctx) => { + // Register tool (deferred to avoid blocking startup) + if (ctx.hasUI) { + void registerOllamaTools(pi).catch((error) => { + ctx.ui.notify( + `Ollama tool failed to load: ${error instanceof Error ? error.message : String(error)}`, + "warning", + ); + }); + } else { + await registerOllamaTools(pi); + } + + // Async probe — don't block startup + probeAndRegister(pi) + .then((found) => { + if (found && ctx.hasUI) { + ctx.ui.setStatus("ollama", "Ollama"); + } + }) + .catch(() => { + // Silently ignore probe failures + }); + }); + + pi.on("session_shutdown", async () => { + if (providerRegistered) { + pi.unregisterProvider("ollama"); + providerRegistered = false; + } + toolsPromise = null; + }); +} diff --git a/src/resources/extensions/ollama/model-capabilities.ts b/src/resources/extensions/ollama/model-capabilities.ts new file mode 100644 index 000000000..8209946c3 --- /dev/null +++ b/src/resources/extensions/ollama/model-capabilities.ts @@ -0,0 +1,145 @@ +// GSD2 — Known model capability table for Ollama models + +/** + * Maps well-known Ollama model families to their capabilities. + * Used to enrich auto-discovered models with accurate context windows, + * vision support, and reasoning detection. + * + * Fallback: estimate from parameter count if model isn't in the table. + */ + +export interface ModelCapability { + contextWindow?: number; + maxTokens?: number; + input?: ("text" | "image")[]; + reasoning?: boolean; +} + +/** + * Known model family capabilities. + * Keys are matched as prefixes against the model name (before the colon/tag). + * More specific entries should appear first. + */ +const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [ + // ─── Reasoning models ─────────────────────────────────────────────── + ["deepseek-r1", { contextWindow: 131072, reasoning: true }], + ["qwq", { contextWindow: 131072, reasoning: true }], + + // ─── Vision models ────────────────────────────────────────────────── + ["llava", { contextWindow: 4096, input: ["text", "image"] }], + ["bakllava", { contextWindow: 4096, input: ["text", "image"] }], + ["moondream", { contextWindow: 8192, input: ["text", "image"] }], + ["llama3.2-vision", { contextWindow: 131072, input: ["text", "image"] }], + ["minicpm-v", { contextWindow: 4096, input: ["text", "image"] }], + + // ─── Code models ──────────────────────────────────────────────────── + ["codestral", { contextWindow: 262144, maxTokens: 32768 }], + ["qwen2.5-coder", { contextWindow: 131072, maxTokens: 32768 }], + ["deepseek-coder-v2", { contextWindow: 131072, maxTokens: 16384 }], + ["starcoder2", { contextWindow: 16384, maxTokens: 8192 }], + ["codegemma", { contextWindow: 8192, maxTokens: 8192 }], + ["codellama", { contextWindow: 16384, maxTokens: 8192 }], + ["devstral", { contextWindow: 131072, maxTokens: 32768 }], + + // ─── Llama family ─────────────────────────────────────────────────── + ["llama3.3", { contextWindow: 131072, maxTokens: 16384 }], + ["llama3.2", { contextWindow: 131072, maxTokens: 16384 }], + ["llama3.1", { contextWindow: 131072, maxTokens: 16384 }], + ["llama3", { contextWindow: 8192, maxTokens: 8192 }], + ["llama2", { contextWindow: 4096, maxTokens: 4096 }], + + // ─── Qwen family ──────────────────────────────────────────────────── + ["qwen3", { contextWindow: 131072, maxTokens: 32768 }], + ["qwen2.5", { contextWindow: 131072, maxTokens: 32768 }], + ["qwen2", { contextWindow: 131072, maxTokens: 32768 }], + + // ─── Gemma family ─────────────────────────────────────────────────── + ["gemma3", { contextWindow: 131072, maxTokens: 16384 }], + ["gemma2", { contextWindow: 8192, maxTokens: 8192 }], + + // ─── Mistral family ───────────────────────────────────────────────── + ["mistral-large", { contextWindow: 131072, maxTokens: 16384 }], + ["mistral-small", { contextWindow: 131072, maxTokens: 16384 }], + ["mistral-nemo", { contextWindow: 131072, maxTokens: 16384 }], + ["mistral", { contextWindow: 32768, maxTokens: 8192 }], + ["mixtral", { contextWindow: 32768, maxTokens: 8192 }], + + // ─── Phi family ───────────────────────────────────────────────────── + ["phi4", { contextWindow: 16384, maxTokens: 16384 }], + ["phi3.5", { contextWindow: 131072, maxTokens: 16384 }], + ["phi3", { contextWindow: 131072, maxTokens: 4096 }], + + // ─── Command R ────────────────────────────────────────────────────── + ["command-r-plus", { contextWindow: 131072, maxTokens: 16384 }], + ["command-r", { contextWindow: 131072, maxTokens: 16384 }], +]; + +/** + * Look up capabilities for a model by name. + * Matches the longest prefix from the known models table. + */ +export function getModelCapabilities(modelName: string): ModelCapability { + // Strip tag (everything after the colon) for matching + const baseName = modelName.split(":")[0].toLowerCase(); + + for (const [pattern, caps] of KNOWN_MODELS) { + if (baseName === pattern || baseName.startsWith(pattern)) { + return caps; + } + } + + return {}; +} + +/** + * Estimate context window from parameter size string (e.g. "7B", "70B", "1.5B"). + * Used as fallback when model isn't in the known table. + */ +export function estimateContextFromParams(parameterSize: string): number { + const match = parameterSize.match(/([\d.]+)\s*([BbMm])/); + if (!match) return 8192; + + const size = parseFloat(match[1]); + const unit = match[2].toUpperCase(); + + // Convert to billions + const billions = unit === "M" ? size / 1000 : size; + + // Rough heuristics: larger models tend to support larger contexts + if (billions >= 70) return 131072; + if (billions >= 30) return 65536; + if (billions >= 13) return 32768; + if (billions >= 7) return 16384; + return 8192; +} + +/** + * Humanize a model name for display (e.g. "llama3.1:8b" → "Llama 3.1 8B"). + */ +export function humanizeModelName(modelName: string): string { + const [base, tag] = modelName.split(":"); + + // Capitalize first letter, add spaces around version numbers + let name = base + .replace(/([a-z])(\d)/g, "$1 $2") + .replace(/(\d)([a-z])/g, "$1 $2") + .replace(/^./, (c) => c.toUpperCase()); + + // Clean up common patterns + name = name.replace(/\s*-\s*/g, " "); + + if (tag && tag !== "latest") { + name += ` ${tag.toUpperCase()}`; + } + + return name; +} + +/** + * Format byte size for display (e.g. 4700000000 → "4.7 GB"). + */ +export function formatModelSize(bytes: number): string { + if (bytes >= 1e9) return `${(bytes / 1e9).toFixed(1)} GB`; + if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(1)} MB`; + return `${(bytes / 1e3).toFixed(0)} KB`; +} diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts new file mode 100644 index 000000000..d881fd013 --- /dev/null +++ b/src/resources/extensions/ollama/ollama-client.ts @@ -0,0 +1,196 @@ +// GSD2 — HTTP client for Ollama REST API + +/** + * Low-level HTTP client for the Ollama REST API. + * Respects the OLLAMA_HOST environment variable for non-default endpoints. + * + * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md + */ + +import type { + OllamaPsResponse, + OllamaPullProgress, + OllamaShowResponse, + OllamaTagsResponse, + OllamaVersionResponse, +} from "./types.js"; + +const DEFAULT_HOST = "http://localhost:11434"; +const PROBE_TIMEOUT_MS = 1500; +const REQUEST_TIMEOUT_MS = 10000; + +/** + * Get the Ollama host URL from OLLAMA_HOST or default. + */ +export function getOllamaHost(): string { + const host = process.env.OLLAMA_HOST; + if (!host) return DEFAULT_HOST; + + // OLLAMA_HOST can be just a host:port without scheme + if (host.startsWith("http://") || host.startsWith("https://")) return host; + return `http://${host}`; +} + +async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetch(url, { ...options, signal: controller.signal }); + } finally { + clearTimeout(timeout); + } +} + +/** + * Check if Ollama is running and reachable. + */ +export async function isRunning(): Promise { + try { + const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS); + return response.ok; + } catch { + return false; + } +} + +/** + * Get Ollama version. + */ +export async function getVersion(): Promise { + try { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/version`); + if (!response.ok) return null; + const data = (await response.json()) as OllamaVersionResponse; + return data.version; + } catch { + return null; + } +} + +/** + * List all locally available models. + */ +export async function listModels(): Promise { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/tags`); + if (!response.ok) { + throw new Error(`Ollama /api/tags returned ${response.status}: ${response.statusText}`); + } + return (await response.json()) as OllamaTagsResponse; +} + +/** + * Get detailed information about a specific model. + */ +export async function showModel(name: string): Promise { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/show`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name }), + }); + if (!response.ok) { + throw new Error(`Ollama /api/show returned ${response.status}: ${response.statusText}`); + } + return (await response.json()) as OllamaShowResponse; +} + +/** + * List currently loaded/running models. + */ +export async function getRunningModels(): Promise { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/ps`); + if (!response.ok) { + throw new Error(`Ollama /api/ps returned ${response.status}: ${response.statusText}`); + } + return (await response.json()) as OllamaPsResponse; +} + +/** + * Pull a model with streaming progress. + * Calls onProgress for each progress update. + * Returns when the pull is complete. + */ +export async function pullModel( + name: string, + onProgress?: (progress: OllamaPullProgress) => void, + signal?: AbortSignal, +): Promise { + const response = await fetch(`${getOllamaHost()}/api/pull`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name, stream: true }), + signal, + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`Ollama /api/pull returned ${response.status}: ${text}`); + } + + if (!response.body) { + throw new Error("Ollama /api/pull returned no body"); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + const progress = JSON.parse(trimmed) as OllamaPullProgress; + onProgress?.(progress); + } catch { + // Skip malformed lines + } + } + } + + // Process remaining buffer + if (buffer.trim()) { + try { + const progress = JSON.parse(buffer.trim()) as OllamaPullProgress; + onProgress?.(progress); + } catch { + // Ignore + } + } +} + +/** + * Delete a local model. + */ +export async function deleteModel(name: string): Promise { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/delete`, { + method: "DELETE", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name }), + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(`Ollama /api/delete returned ${response.status}: ${text}`); + } +} + +/** + * Copy a model to a new name. + */ +export async function copyModel(source: string, destination: string): Promise { + const response = await fetchWithTimeout(`${getOllamaHost()}/api/copy`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ source, destination }), + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(`Ollama /api/copy returned ${response.status}: ${text}`); + } +} diff --git a/src/resources/extensions/ollama/ollama-commands.ts b/src/resources/extensions/ollama/ollama-commands.ts new file mode 100644 index 000000000..81322c784 --- /dev/null +++ b/src/resources/extensions/ollama/ollama-commands.ts @@ -0,0 +1,248 @@ +// GSD2 — Ollama slash commands + +/** + * Registers /ollama slash commands for managing local Ollama models. + * + * Commands: + * /ollama — Show status (running?, version, loaded models) + * /ollama list — List all available local models with sizes + * /ollama pull — Pull a model with progress + * /ollama remove — Delete a local model + * /ollama ps — Show running models and resource usage + */ + +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Text } from "@gsd/pi-tui"; +import * as client from "./ollama-client.js"; +import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js"; +import { formatModelSize } from "./model-capabilities.js"; + +export function registerOllamaCommands(pi: ExtensionAPI): void { + pi.registerCommand("ollama", { + description: "Manage local Ollama models — list | pull | remove | ps", + async handler(args, ctx) { + const parts = (args ?? "").trim().split(/\s+/); + const subcommand = parts[0] || "status"; + const modelArg = parts.slice(1).join(" "); + + switch (subcommand) { + case "status": + return await handleStatus(ctx); + case "list": + case "ls": + return await handleList(ctx); + case "pull": + return await handlePull(modelArg, ctx); + case "remove": + case "rm": + case "delete": + return await handleRemove(modelArg, ctx); + case "ps": + return await handlePs(ctx); + default: + ctx.ui.notify( + `Unknown subcommand: ${subcommand}. Use: status, list, pull, remove, ps`, + "warning", + ); + } + }, + }); +} + +async function handleStatus(ctx: any): Promise { + const running = await client.isRunning(); + if (!running) { + ctx.ui.notify( + "Ollama is not running. Install from https://ollama.com and run 'ollama serve'", + "warning", + ); + return; + } + + const version = await client.getVersion(); + const lines: string[] = []; + lines.push(`Ollama${version ? ` v${version}` : ""} — running (${client.getOllamaHost()})`); + + // Show loaded models + try { + const ps = await client.getRunningModels(); + if (ps.models && ps.models.length > 0) { + lines.push(""); + lines.push("Loaded:"); + for (const m of ps.models) { + const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU"; + const expiresAt = new Date(m.expires_at); + const idleMs = expiresAt.getTime() - Date.now(); + const idleMin = Math.max(0, Math.floor(idleMs / 60000)); + lines.push(` ${m.name} ${vram} expires in ${idleMin}m`); + } + } + } catch { + // ps endpoint may not be available on older versions + } + + // Show available models + try { + const models = await discoverModels(); + if (models.length > 0) { + lines.push(""); + lines.push("Available:"); + for (const m of models) { + lines.push(` ${formatModelForDisplay(m)}`); + } + } else { + lines.push(""); + lines.push("No models pulled. Use /ollama pull to get started."); + } + } catch (err) { + lines.push(""); + lines.push(`Error listing models: ${err instanceof Error ? err.message : String(err)}`); + } + + await ctx.ui.custom( + (tui: any, theme: any, _kb: any, done: (r: undefined) => void) => { + const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0); + setTimeout(() => done(undefined), 0); + return text; + }, + ); +} + +async function handleList(ctx: any): Promise { + const running = await client.isRunning(); + if (!running) { + ctx.ui.notify("Ollama is not running", "warning"); + return; + } + + const models = await discoverModels(); + if (models.length === 0) { + ctx.ui.notify("No models available. Use /ollama pull to download one.", "info"); + return; + } + + const lines = ["Local Ollama models:", ""]; + for (const m of models) { + lines.push(` ${formatModelForDisplay(m)}`); + } + + await ctx.ui.custom( + (tui: any, theme: any, _kb: any, done: (r: undefined) => void) => { + const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0); + setTimeout(() => done(undefined), 0); + return text; + }, + ); +} + +async function handlePull(modelName: string, ctx: any): Promise { + if (!modelName) { + ctx.ui.notify("Usage: /ollama pull (e.g. /ollama pull llama3.1:8b)", "warning"); + return; + } + + const running = await client.isRunning(); + if (!running) { + ctx.ui.notify("Ollama is not running", "warning"); + return; + } + + ctx.ui.setWidget("ollama-pull", [`Pulling ${modelName}...`]); + + try { + let lastPercent = -1; + await client.pullModel(modelName, (progress) => { + if (progress.total && progress.completed) { + const percent = Math.floor((progress.completed / progress.total) * 100); + if (percent !== lastPercent) { + lastPercent = percent; + const completed = formatModelSize(progress.completed); + const total = formatModelSize(progress.total); + ctx.ui.setWidget("ollama-pull", [ + `Pulling ${modelName}... ${percent}% (${completed} / ${total})`, + ]); + } + } else if (progress.status) { + ctx.ui.setWidget("ollama-pull", [`${modelName}: ${progress.status}`]); + } + }); + + ctx.ui.setWidget("ollama-pull", undefined); + ctx.ui.notify(`${modelName} pulled successfully`, "success"); + } catch (err) { + ctx.ui.setWidget("ollama-pull", undefined); + ctx.ui.notify( + `Failed to pull ${modelName}: ${err instanceof Error ? err.message : String(err)}`, + "error", + ); + } +} + +async function handleRemove(modelName: string, ctx: any): Promise { + if (!modelName) { + ctx.ui.notify("Usage: /ollama remove ", "warning"); + return; + } + + const running = await client.isRunning(); + if (!running) { + ctx.ui.notify("Ollama is not running", "warning"); + return; + } + + const confirmed = await ctx.ui.confirm( + "Delete model", + `Are you sure you want to delete ${modelName}?`, + ); + + if (!confirmed) return; + + try { + await client.deleteModel(modelName); + ctx.ui.notify(`${modelName} deleted`, "success"); + } catch (err) { + ctx.ui.notify( + `Failed to delete ${modelName}: ${err instanceof Error ? err.message : String(err)}`, + "error", + ); + } +} + +async function handlePs(ctx: any): Promise { + const running = await client.isRunning(); + if (!running) { + ctx.ui.notify("Ollama is not running", "warning"); + return; + } + + try { + const ps = await client.getRunningModels(); + if (!ps.models || ps.models.length === 0) { + ctx.ui.notify("No models currently loaded in memory", "info"); + return; + } + + const lines = ["Running models:", ""]; + for (const m of ps.models) { + const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU only"; + const totalSize = formatModelSize(m.size); + const expiresAt = new Date(m.expires_at); + const idleMs = expiresAt.getTime() - Date.now(); + const idleMin = Math.max(0, Math.floor(idleMs / 60000)); + lines.push(` ${m.name} ${totalSize} ${vram} expires in ${idleMin}m`); + } + + await ctx.ui.custom( + (tui: any, theme: any, _kb: any, done: (r: undefined) => void) => { + const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0); + setTimeout(() => done(undefined), 0); + return text; + }, + ); + } catch (err) { + ctx.ui.notify( + `Failed to get running models: ${err instanceof Error ? err.message : String(err)}`, + "error", + ); + } +} diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts new file mode 100644 index 000000000..eb6916b9e --- /dev/null +++ b/src/resources/extensions/ollama/ollama-discovery.ts @@ -0,0 +1,106 @@ +// GSD2 — Ollama model discovery and capability detection + +/** + * Discovers locally available Ollama models and enriches them with + * capability metadata (context window, vision, reasoning) from the + * known model table and /api/show responses. + * + * Returns models in the format expected by pi.registerProvider(). + */ + +import { listModels, getOllamaHost } from "./ollama-client.js"; +import { + estimateContextFromParams, + formatModelSize, + getModelCapabilities, + humanizeModelName, +} from "./model-capabilities.js"; +import type { OllamaModelInfo } from "./types.js"; + +export interface DiscoveredOllamaModel { + id: string; + name: string; + reasoning: boolean; + input: ("text" | "image")[]; + cost: { input: number; output: number; cacheRead: number; cacheWrite: number }; + contextWindow: number; + maxTokens: number; + /** Raw size in bytes for display purposes */ + sizeBytes: number; + /** Parameter size string from Ollama (e.g. "7B") */ + parameterSize: string; +} + +const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }; + +function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel { + const caps = getModelCapabilities(info.name); + const parameterSize = info.details?.parameter_size ?? ""; + + // Determine context window: known table > estimate from param size > default + const contextWindow = + caps.contextWindow ?? + (parameterSize ? estimateContextFromParams(parameterSize) : 8192); + + // Determine max tokens: known table > fraction of context > default + const maxTokens = + caps.maxTokens ?? Math.min(Math.floor(contextWindow / 4), 16384); + + // Detect vision from families or known table + const hasVision = + caps.input?.includes("image") ?? + (info.details?.families?.some((f) => f === "clip" || f === "mllama") ?? false); + + // Detect reasoning from known table + const reasoning = caps.reasoning ?? false; + + return { + id: info.name, + name: humanizeModelName(info.name), + reasoning, + input: hasVision ? ["text", "image"] : ["text"], + cost: ZERO_COST, + contextWindow, + maxTokens, + sizeBytes: info.size, + parameterSize, + }; +} + +/** + * Discover all locally available Ollama models with enriched capabilities. + */ +export async function discoverModels(): Promise { + const tags = await listModels(); + if (!tags.models || tags.models.length === 0) return []; + + return tags.models.map(enrichModel); +} + +/** + * Format a discovered model for display in model list. + */ +export function formatModelForDisplay(model: DiscoveredOllamaModel): string { + const parts = [model.id]; + + if (model.sizeBytes > 0) { + parts.push(`(${formatModelSize(model.sizeBytes)})`); + } + + const flags: string[] = []; + if (model.reasoning) flags.push("reasoning"); + if (model.input.includes("image")) flags.push("vision"); + + if (flags.length > 0) { + parts.push(`[${flags.join(", ")}]`); + } + + return parts.join(" "); +} + +/** + * Build the OpenAI-compat base URL for Ollama. + */ +export function getOllamaOpenAIBaseUrl(): string { + return `${getOllamaHost()}/v1`; +} diff --git a/src/resources/extensions/ollama/ollama-tool.ts b/src/resources/extensions/ollama/ollama-tool.ts new file mode 100644 index 000000000..ad5af5885 --- /dev/null +++ b/src/resources/extensions/ollama/ollama-tool.ts @@ -0,0 +1,218 @@ +// GSD2 — LLM-callable Ollama management tool +/** + * Registers an ollama_manage tool that the LLM can call to interact + * with the local Ollama instance — list models, pull new ones, check status. + */ + +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Text } from "@gsd/pi-tui"; +import { Type } from "@sinclair/typebox"; +import * as client from "./ollama-client.js"; +import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js"; +import { formatModelSize } from "./model-capabilities.js"; + +interface OllamaToolDetails { + action: string; + model?: string; + modelCount?: number; + durationMs: number; + error?: string; +} + +export function registerOllamaTool(pi: ExtensionAPI): void { + pi.registerTool({ + name: "ollama_manage", + label: "Ollama", + description: + "Manage local Ollama models. List available models, pull new ones, " + + "check Ollama status, or see running models and resource usage. " + + "Use this when you need a specific local model that isn't available yet.", + promptSnippet: "Manage local Ollama models (list, pull, status, ps)", + promptGuidelines: [ + "Use 'list' to see what models are available locally before trying to use one.", + "Use 'pull' to download a model that isn't available yet.", + "Use 'status' to check if Ollama is running.", + "Use 'ps' to see which models are loaded in memory and VRAM usage.", + "Common models: llama3.1:8b, qwen2.5-coder:7b, deepseek-r1:8b, codestral:22b", + ], + parameters: Type.Object({ + action: Type.Union( + [ + Type.Literal("list"), + Type.Literal("pull"), + Type.Literal("status"), + Type.Literal("ps"), + ], + { description: "Action to perform" }, + ), + model: Type.Optional( + Type.String({ description: "Model name (required for pull)" }), + ), + }), + + async execute(_toolCallId, params, signal, onUpdate, _ctx) { + const startTime = Date.now(); + const { action, model } = params; + + try { + switch (action) { + case "status": { + const running = await client.isRunning(); + if (!running) { + return { + content: [{ type: "text", text: "Ollama is not running. It needs to be started with 'ollama serve'." }], + details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + const version = await client.getVersion(); + return { + content: [{ type: "text", text: `Ollama${version ? ` v${version}` : ""} is running at ${client.getOllamaHost()}` }], + details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + case "list": { + const running = await client.isRunning(); + if (!running) { + return { + content: [{ type: "text", text: "Ollama is not running." }], + isError: true, + details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails, + }; + } + + const models = await discoverModels(); + if (models.length === 0) { + return { + content: [{ type: "text", text: "No models available. Pull one with action='pull'." }], + details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + const lines = models.map((m) => formatModelForDisplay(m)); + return { + content: [{ type: "text", text: `Available models:\n${lines.join("\n")}` }], + details: { action, modelCount: models.length, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + case "pull": { + if (!model) { + return { + content: [{ type: "text", text: "Error: 'model' parameter is required for pull action." }], + isError: true, + details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails, + }; + } + + const running = await client.isRunning(); + if (!running) { + return { + content: [{ type: "text", text: "Ollama is not running." }], + isError: true, + details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails, + }; + } + + let lastStatus = ""; + await client.pullModel(model, (progress) => { + if (progress.total && progress.completed) { + const pct = Math.floor((progress.completed / progress.total) * 100); + const status = `Pulling ${model}... ${pct}%`; + if (status !== lastStatus) { + lastStatus = status; + onUpdate?.({ content: [{ type: "text", text: status }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails }); + } + } else if (progress.status && progress.status !== lastStatus) { + lastStatus = progress.status; + onUpdate?.({ content: [{ type: "text", text: `${model}: ${progress.status}` }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails }); + } + }, signal); + + return { + content: [{ type: "text", text: `Successfully pulled ${model}` }], + details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + case "ps": { + const running = await client.isRunning(); + if (!running) { + return { + content: [{ type: "text", text: "Ollama is not running." }], + isError: true, + details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails, + }; + } + + const ps = await client.getRunningModels(); + if (!ps.models || ps.models.length === 0) { + return { + content: [{ type: "text", text: "No models currently loaded in memory." }], + details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + const lines = ps.models.map((m) => { + const vram = m.size_vram > 0 ? `${formatModelSize(m.size_vram)} VRAM` : "CPU"; + return `${m.name} — ${formatModelSize(m.size)} total, ${vram}`; + }); + + return { + content: [{ type: "text", text: `Loaded models:\n${lines.join("\n")}` }], + details: { action, modelCount: ps.models.length, durationMs: Date.now() - startTime } as OllamaToolDetails, + }; + } + + default: + return { + content: [{ type: "text", text: `Unknown action: ${action}` }], + isError: true, + details: { action, durationMs: Date.now() - startTime, error: "unknown_action" } as OllamaToolDetails, + }; + } + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { + content: [{ type: "text", text: `Ollama error: ${msg}` }], + isError: true, + details: { action, model, durationMs: Date.now() - startTime, error: msg } as OllamaToolDetails, + }; + } + }, + + renderCall(args, theme) { + let text = theme.fg("toolTitle", theme.bold("ollama ")); + text += theme.fg("accent", args.action); + if (args.model) { + text += theme.fg("dim", ` ${args.model}`); + } + return new Text(text, 0, 0); + }, + + renderResult(result, { isPartial, expanded }, theme) { + const d = result.details as OllamaToolDetails | undefined; + + if (isPartial) return new Text(theme.fg("warning", "Working..."), 0, 0); + if ((result as any).isError || d?.error) { + return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0); + } + + let text = theme.fg("success", d?.action ?? "done"); + if (d?.modelCount !== undefined) { + text += theme.fg("dim", ` (${d.modelCount} models)`); + } + text += theme.fg("dim", ` ${d?.durationMs ?? 0}ms`); + + if (expanded) { + const content = result.content[0]; + if (content?.type === "text") { + const preview = content.text.split("\n").slice(0, 10).join("\n"); + text += "\n\n" + theme.fg("dim", preview); + } + } + + return new Text(text, 0, 0); + }, + }); +} diff --git a/src/resources/extensions/ollama/tests/model-capabilities.test.ts b/src/resources/extensions/ollama/tests/model-capabilities.test.ts new file mode 100644 index 000000000..61af68e9b --- /dev/null +++ b/src/resources/extensions/ollama/tests/model-capabilities.test.ts @@ -0,0 +1,162 @@ +// GSD2 — Tests for Ollama model capability detection +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { + getModelCapabilities, + estimateContextFromParams, + humanizeModelName, + formatModelSize, +} from "../model-capabilities.js"; + +// ─── getModelCapabilities ──────────────────────────────────────────────────── + +describe("getModelCapabilities", () => { + it("returns reasoning for deepseek-r1 models", () => { + const caps = getModelCapabilities("deepseek-r1:8b"); + assert.equal(caps.reasoning, true); + assert.equal(caps.contextWindow, 131072); + }); + + it("returns reasoning for qwq models", () => { + const caps = getModelCapabilities("qwq:32b"); + assert.equal(caps.reasoning, true); + }); + + it("returns vision for llava models", () => { + const caps = getModelCapabilities("llava:7b"); + assert.deepEqual(caps.input, ["text", "image"]); + }); + + it("returns vision for llama3.2-vision models", () => { + const caps = getModelCapabilities("llama3.2-vision:11b"); + assert.deepEqual(caps.input, ["text", "image"]); + }); + + it("returns correct context for llama3.1", () => { + const caps = getModelCapabilities("llama3.1:8b"); + assert.equal(caps.contextWindow, 131072); + }); + + it("returns correct context for llama3 (no .1)", () => { + const caps = getModelCapabilities("llama3:8b"); + assert.equal(caps.contextWindow, 8192); + }); + + it("returns correct context for llama2", () => { + const caps = getModelCapabilities("llama2:7b"); + assert.equal(caps.contextWindow, 4096); + }); + + it("returns correct context for qwen2.5-coder", () => { + const caps = getModelCapabilities("qwen2.5-coder:7b"); + assert.equal(caps.contextWindow, 131072); + assert.equal(caps.maxTokens, 32768); + }); + + it("returns correct context for codestral", () => { + const caps = getModelCapabilities("codestral:22b"); + assert.equal(caps.contextWindow, 262144); + }); + + it("returns correct context for mistral-nemo", () => { + const caps = getModelCapabilities("mistral-nemo:12b"); + assert.equal(caps.contextWindow, 131072); + }); + + it("returns correct context for gemma3", () => { + const caps = getModelCapabilities("gemma3:9b"); + assert.equal(caps.contextWindow, 131072); + }); + + it("returns empty object for unknown models", () => { + const caps = getModelCapabilities("totally-unknown-model:3b"); + assert.deepEqual(caps, {}); + }); + + it("strips tag before matching", () => { + const caps = getModelCapabilities("llama3.1:70b-instruct-q4_0"); + assert.equal(caps.contextWindow, 131072); + }); + + it("matches case-insensitively", () => { + const caps = getModelCapabilities("Llama3.1:8B"); + assert.equal(caps.contextWindow, 131072); + }); +}); + +// ─── estimateContextFromParams ─────────────────────────────────────────────── + +describe("estimateContextFromParams", () => { + it("estimates 8192 for small models", () => { + assert.equal(estimateContextFromParams("1.5B"), 8192); + }); + + it("estimates 16384 for 7B models", () => { + assert.equal(estimateContextFromParams("7B"), 16384); + }); + + it("estimates 32768 for 13B models", () => { + assert.equal(estimateContextFromParams("13B"), 32768); + }); + + it("estimates 65536 for 34B models", () => { + assert.equal(estimateContextFromParams("34B"), 65536); + }); + + it("estimates 131072 for 70B+ models", () => { + assert.equal(estimateContextFromParams("70B"), 131072); + }); + + it("handles decimal sizes", () => { + assert.equal(estimateContextFromParams("7.5B"), 16384); + }); + + it("handles M (millions)", () => { + assert.equal(estimateContextFromParams("500M"), 8192); + }); + + it("returns 8192 for unparseable input", () => { + assert.equal(estimateContextFromParams("unknown"), 8192); + }); + + it("returns 8192 for empty string", () => { + assert.equal(estimateContextFromParams(""), 8192); + }); +}); + +// ─── humanizeModelName ─────────────────────────────────────────────────────── + +describe("humanizeModelName", () => { + it("capitalizes and adds tag", () => { + assert.equal(humanizeModelName("llama3.1:8b"), "Llama 3.1 8B"); + }); + + it("handles latest tag", () => { + assert.equal(humanizeModelName("llama3.1:latest"), "Llama 3.1"); + }); + + it("handles no tag", () => { + assert.equal(humanizeModelName("llama3.1"), "Llama 3.1"); + }); + + it("handles hyphenated names", () => { + const result = humanizeModelName("deepseek-r1:8b"); + assert.ok(result.includes("8B")); + }); +}); + +// ─── formatModelSize ───────────────────────────────────────────────────────── + +describe("formatModelSize", () => { + it("formats GB", () => { + assert.equal(formatModelSize(4_700_000_000), "4.7 GB"); + }); + + it("formats MB", () => { + assert.equal(formatModelSize(500_000_000), "500.0 MB"); + }); + + it("formats KB", () => { + assert.equal(formatModelSize(500_000), "500 KB"); + }); +}); diff --git a/src/resources/extensions/ollama/tests/ollama-client.test.ts b/src/resources/extensions/ollama/tests/ollama-client.test.ts new file mode 100644 index 000000000..0deae397a --- /dev/null +++ b/src/resources/extensions/ollama/tests/ollama-client.test.ts @@ -0,0 +1,38 @@ +// GSD2 — Tests for Ollama HTTP client +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { getOllamaHost } from "../ollama-client.js"; + +// ─── getOllamaHost ────────────────────────────────────────────────────────── + +describe("getOllamaHost", () => { + const originalHost = process.env.OLLAMA_HOST; + + afterEach(() => { + if (originalHost === undefined) { + delete process.env.OLLAMA_HOST; + } else { + process.env.OLLAMA_HOST = originalHost; + } + }); + + it("returns default when OLLAMA_HOST is not set", () => { + delete process.env.OLLAMA_HOST; + assert.equal(getOllamaHost(), "http://localhost:11434"); + }); + + it("returns OLLAMA_HOST when set with scheme", () => { + process.env.OLLAMA_HOST = "http://myhost:12345"; + assert.equal(getOllamaHost(), "http://myhost:12345"); + }); + + it("adds http:// when OLLAMA_HOST has no scheme", () => { + process.env.OLLAMA_HOST = "myhost:12345"; + assert.equal(getOllamaHost(), "http://myhost:12345"); + }); + + it("preserves https:// scheme", () => { + process.env.OLLAMA_HOST = "https://secure-ollama.example.com"; + assert.equal(getOllamaHost(), "https://secure-ollama.example.com"); + }); +}); diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts new file mode 100644 index 000000000..b69cf84e1 --- /dev/null +++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts @@ -0,0 +1,28 @@ +// GSD2 — Tests for Ollama model discovery and enrichment +import { describe, it, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { getOllamaOpenAIBaseUrl } from "../ollama-discovery.js"; + +// ─── getOllamaOpenAIBaseUrl ───────────────────────────────────────────────── + +describe("getOllamaOpenAIBaseUrl", () => { + const originalHost = process.env.OLLAMA_HOST; + + afterEach(() => { + if (originalHost === undefined) { + delete process.env.OLLAMA_HOST; + } else { + process.env.OLLAMA_HOST = originalHost; + } + }); + + it("returns default OpenAI-compat URL", () => { + delete process.env.OLLAMA_HOST; + assert.equal(getOllamaOpenAIBaseUrl(), "http://localhost:11434/v1"); + }); + + it("appends /v1 to custom OLLAMA_HOST", () => { + process.env.OLLAMA_HOST = "http://remote:9999"; + assert.equal(getOllamaOpenAIBaseUrl(), "http://remote:9999/v1"); + }); +}); diff --git a/src/resources/extensions/ollama/types.ts b/src/resources/extensions/ollama/types.ts new file mode 100644 index 000000000..5f2c88705 --- /dev/null +++ b/src/resources/extensions/ollama/types.ts @@ -0,0 +1,130 @@ +// GSD2 — Ollama API response types + +/** + * Type definitions for the Ollama REST API. + * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md + */ + +// ─── /api/tags ────────────────────────────────────────────────────────────── + +export interface OllamaModelDetails { + parent_model: string; + format: string; + family: string; + families: string[] | null; + parameter_size: string; + quantization_level: string; +} + +export interface OllamaModelInfo { + name: string; + model: string; + modified_at: string; + size: number; + digest: string; + details: OllamaModelDetails; +} + +export interface OllamaTagsResponse { + models: OllamaModelInfo[]; +} + +// ─── /api/show ────────────────────────────────────────────────────────────── + +export interface OllamaShowResponse { + modelfile: string; + parameters: string; + template: string; + details: OllamaModelDetails; + model_info: Record; +} + +// ─── /api/ps ──────────────────────────────────────────────────────────────── + +export interface OllamaRunningModel { + name: string; + model: string; + size: number; + digest: string; + details: OllamaModelDetails; + expires_at: string; + size_vram: number; +} + +export interface OllamaPsResponse { + models: OllamaRunningModel[]; +} + +// ─── /api/pull ────────────────────────────────────────────────────────────── + +export interface OllamaPullProgress { + status: string; + digest?: string; + total?: number; + completed?: number; +} + +// ─── /api/version ─────────────────────────────────────────────────────────── + +export interface OllamaVersionResponse { + version: string; +} + +// ─── /api/chat ────────────────────────────────────────────────────────────── + +export interface OllamaChatMessage { + role: "system" | "user" | "assistant" | "tool"; + content: string; + images?: string[]; + tool_calls?: OllamaToolCall[]; +} + +export interface OllamaToolCall { + function: { + name: string; + arguments: Record; + }; +} + +export interface OllamaTool { + type: "function"; + function: { + name: string; + description: string; + parameters: { + type: "object"; + required?: string[]; + properties: Record; + }; + }; +} + +export interface OllamaChatRequest { + model: string; + messages: OllamaChatMessage[]; + stream?: boolean; + tools?: OllamaTool[]; + options?: { + num_ctx?: number; + num_predict?: number; + temperature?: number; + top_p?: number; + top_k?: number; + stop?: string[]; + }; + keep_alive?: string; +} + +export interface OllamaChatResponse { + model: string; + created_at: string; + message: OllamaChatMessage; + done: boolean; + done_reason?: string; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; + prompt_eval_duration?: number; + eval_count?: number; + eval_duration?: number; +}