feat: add Ollama Cloud as model and web tool provider (#430) (#434)

Add Ollama Cloud (ollama.com) as a built-in provider with both model hosting and web search/fetch capabilities. Model provider: - 13 curated models via OpenAI-compatible API (Llama 3.1, Qwen 3, DeepSeek R1, Gemma 3, Mistral, Phi-4, GPT-OSS) - Auth via OLLAMA_API_KEY environment variable - Registered in onboarding, env hydration, and model resolver Web tool provider: - Search via POST ollama.com/api/web_search - Page fetch via POST ollama.com/api/web_fetch (fallback after Jina) - Added as third search provider option alongside Tavily and Brave - /search-provider command updated with ollama option Closes #430
2026-03-14 22:03:31 -05:00 · 2026-03-14 22:03:31 -05:00 · ecf8125e39
commit ecf8125e39
parent 3fed189e00
14 changed files with 468 additions and 21 deletions
--- a/packages/pi-ai/src/env-api-keys.ts
+++ b/packages/pi-ai/src/env-api-keys.ts
@ -123,6 +123,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		"opencode-go": "OPENCODE_API_KEY",
 		"kimi-coding": "KIMI_API_KEY",
 		"alibaba-coding-plan": "ALIBABA_API_KEY",
+		"ollama-cloud": "OLLAMA_API_KEY",
 		"custom-openai": "CUSTOM_OPENAI_API_KEY",
 	};

--- a/packages/pi-ai/src/models.generated.ts
+++ b/packages/pi-ai/src/models.generated.ts
@ -13522,4 +13522,240 @@ export const MODELS = {
 			maxTokens: 32768,
 		} satisfies Model<"anthropic-messages">,
 	},
+	"ollama-cloud": {
+		"llama3.1:8b": {
+			id: "llama3.1:8b",
+			name: "Llama 3.1 8B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"llama3.1:70b": {
+			id: "llama3.1:70b",
+			name: "Llama 3.1 70B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"llama3.1:405b": {
+			id: "llama3.1:405b",
+			name: "Llama 3.1 405B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"qwen3:8b": {
+			id: "qwen3:8b",
+			name: "Qwen 3 8B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"qwen3:32b": {
+			id: "qwen3:32b",
+			name: "Qwen 3 32B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"deepseek-r1:8b": {
+			id: "deepseek-r1:8b",
+			name: "DeepSeek R1 8B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"deepseek-r1:70b": {
+			id: "deepseek-r1:70b",
+			name: "DeepSeek R1 70B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"gemma3:12b": {
+			id: "gemma3:12b",
+			name: "Gemma 3 12B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"gemma3:27b": {
+			id: "gemma3:27b",
+			name: "Gemma 3 27B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"mistral:7b": {
+			id: "mistral:7b",
+			name: "Mistral 7B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"phi4:14b": {
+			id: "phi4:14b",
+			name: "Phi-4 14B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"gpt-oss:20b": {
+			id: "gpt-oss:20b",
+			name: "GPT-OSS 20B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+		"gpt-oss:120b": {
+			id: "gpt-oss:120b",
+			name: "GPT-OSS 120B",
+			api: "openai-completions",
+			provider: "ollama-cloud",
+			baseUrl: "https://ollama.com/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 8192,
+		} satisfies Model<"openai-completions">,
+	},
 } as const;
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@ -40,7 +40,8 @@ export type KnownProvider =
 	| "opencode"
 	| "opencode-go"
 	| "kimi-coding"
-	| "alibaba-coding-plan";
+	| "alibaba-coding-plan"
+	| "ollama-cloud";
 export type Provider = KnownProvider | string;

 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
--- a/packages/pi-coding-agent/src/cli/args.ts
+++ b/packages/pi-coding-agent/src/cli/args.ts
@ -290,6 +290,7 @@ ${chalk.bold("Environment Variables:")}
  AI_GATEWAY_API_KEY               - Vercel AI Gateway API key
  ZAI_API_KEY                      - ZAI API key
  MISTRAL_API_KEY                  - Mistral API key
+  OLLAMA_API_KEY                   - Ollama Cloud API key
  MINIMAX_API_KEY                  - MiniMax API key
  OPENCODE_API_KEY                 - OpenCode Zen/OpenCode Go API key
  KIMI_API_KEY                     - Kimi For Coding API key
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@ -36,6 +36,7 @@ export const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"opencode-go": "kimi-k2.5",
 	"kimi-coding": "kimi-k2-thinking",
 	"alibaba-coding-plan": "qwen3.5-plus",
+	"ollama-cloud": "qwen3:32b",
 };

 export interface ScopedModel {
--- a/src/onboarding.ts
+++ b/src/onboarding.ts
@ -73,6 +73,7 @@ const LLM_PROVIDER_IDS = [
  'xai',
  'openrouter',
  'mistral',
+  'ollama-cloud',
  'custom-openai',
 ]

@ -88,6 +89,7 @@ const OTHER_PROVIDERS = [
  { value: 'xai', label: 'xAI (Grok)' },
  { value: 'openrouter', label: 'OpenRouter' },
  { value: 'mistral', label: 'Mistral' },
+  { value: 'ollama-cloud', label: 'Ollama Cloud' },
  { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' },
 ]

@ -873,6 +875,7 @@ export function loadStoredEnvKeys(authStorage: AuthStorage): void {
    ['slack_bot',     'SLACK_BOT_TOKEN'],
    ['discord_bot',   'DISCORD_BOT_TOKEN'],
    ['groq',          'GROQ_API_KEY'],
+    ['ollama-cloud',  'OLLAMA_API_KEY'],
    ['custom-openai', 'CUSTOM_OPENAI_API_KEY'],
  ]
  for (const [provider, envVar] of providers) {
--- a/src/resources/extensions/search-the-web/command-search-provider.ts
+++ b/src/resources/extensions/search-the-web/command-search-provider.ts
@ -13,16 +13,18 @@ import type { AutocompleteItem } from '@gsd/pi-tui'
 import {
  getTavilyApiKey,
  getBraveApiKey,
+  getOllamaApiKey,
  getSearchProviderPreference,
  setSearchProviderPreference,
  resolveSearchProvider,
  type SearchProviderPreference,
 } from './provider.ts'

-const VALID_PREFERENCES: SearchProviderPreference[] = ['tavily', 'brave', 'auto']
+const VALID_PREFERENCES: SearchProviderPreference[] = ['tavily', 'brave', 'ollama', 'auto']

-function keyStatus(provider: 'tavily' | 'brave'): string {
+function keyStatus(provider: 'tavily' | 'brave' | 'ollama'): string {
  if (provider === 'tavily') return getTavilyApiKey() ? '✓' : '✗'
+  if (provider === 'ollama') return getOllamaApiKey() ? '✓' : '✗'
  return getBraveApiKey() ? '✓' : '✗'
 }

@ -30,6 +32,7 @@ function buildSelectOptions(): string[] {
  return [
    `tavily (key: ${keyStatus('tavily')})`,
    `brave (key: ${keyStatus('brave')})`,
+    `ollama (key: ${keyStatus('ollama')})`,
    `auto`,
  ]
 }
@ -37,12 +40,13 @@ function buildSelectOptions(): string[] {
 function parseSelectChoice(choice: string): SearchProviderPreference {
  if (choice.startsWith('tavily')) return 'tavily'
  if (choice.startsWith('brave')) return 'brave'
+  if (choice.startsWith('ollama')) return 'ollama'
  return 'auto'
 }

 export function registerSearchProviderCommand(pi: ExtensionAPI): void {
  pi.registerCommand('search-provider', {
-    description: 'Switch search provider (tavily, brave, auto)',
+    description: 'Switch search provider (tavily, brave, ollama, auto)',

    getArgumentCompletions(prefix: string): AutocompleteItem[] | null {
      const trimmed = prefix.trim().toLowerCase()
@ -51,7 +55,7 @@ export function registerSearchProviderCommand(pi: ExtensionAPI): void {
        .map((p) => {
          let description: string
          if (p === 'auto') {
-            description = `Auto-select (tavily: ${keyStatus('tavily')}, brave: ${keyStatus('brave')})`
+            description = `Auto-select (tavily: ${keyStatus('tavily')}, brave: ${keyStatus('brave')}, ollama: ${keyStatus('ollama')})`
          } else {
            description = `key: ${keyStatus(p)}`
          }
--- a/src/resources/extensions/search-the-web/provider.ts
+++ b/src/resources/extensions/search-the-web/provider.ts
@ -18,10 +18,10 @@ import { join } from 'path'
 // where the relative import '../../../app-paths.ts' doesn't resolve.
 const authFilePath = join(homedir(), '.gsd', 'agent', 'auth.json')

-export type SearchProvider = 'tavily' | 'brave'
+export type SearchProvider = 'tavily' | 'brave' | 'ollama'
 export type SearchProviderPreference = SearchProvider | 'auto'

-const VALID_PREFERENCES = new Set<string>(['tavily', 'brave', 'auto'])
+const VALID_PREFERENCES = new Set<string>(['tavily', 'brave', 'ollama', 'auto'])
 const PREFERENCE_KEY = 'search_provider'

 /** Returns the Tavily API key from the environment, or empty string if not set. */
@ -34,6 +34,11 @@ export function getBraveApiKey(): string {
  return process.env.BRAVE_API_KEY || ''
 }

+/** Returns the Ollama API key from the environment, or empty string if not set. */
+export function getOllamaApiKey(): string {
+  return process.env.OLLAMA_API_KEY || ''
+}
+
 /**
 * Read the user's search provider preference from auth.json.
 * Returns 'auto' if no preference is stored or the stored value is invalid.
@ -78,9 +83,11 @@ export function setSearchProviderPreference(pref: SearchProviderPreference, auth
 export function resolveSearchProvider(overridePreference?: string): SearchProvider | null {
  const tavilyKey = getTavilyApiKey()
  const braveKey = getBraveApiKey()
+  const ollamaKey = getOllamaApiKey()

  const hasTavily = tavilyKey.length > 0
  const hasBrave = braveKey.length > 0
+  const hasOllama = ollamaKey.length > 0

  // Determine effective preference
  let pref: SearchProviderPreference
@ -100,18 +107,28 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid
  if (pref === 'auto') {
    if (hasTavily) return 'tavily'
    if (hasBrave) return 'brave'
+    if (hasOllama) return 'ollama'
    return null
  }

  if (pref === 'tavily') {
    if (hasTavily) return 'tavily'
    if (hasBrave) return 'brave'
+    if (hasOllama) return 'ollama'
    return null
  }

  if (pref === 'brave') {
    if (hasBrave) return 'brave'
    if (hasTavily) return 'tavily'
+    if (hasOllama) return 'ollama'
+    return null
+  }
+
+  if (pref === 'ollama') {
+    if (hasOllama) return 'ollama'
+    if (hasTavily) return 'tavily'
+    if (hasBrave) return 'brave'
    return null
  }

--- a/src/resources/extensions/search-the-web/tool-fetch-page.ts
+++ b/src/resources/extensions/search-the-web/tool-fetch-page.ts
@ -17,6 +17,7 @@ import { LRUTTLCache } from "./cache.js";
 import { fetchSimple, HttpError } from "./http.js";
 import { extractDomain } from "./url-utils.js";
 import { formatPageContent, type FormatPageOptions } from "./format.js";
+import { getOllamaApiKey } from "./provider.js";

 // =============================================================================
 // Cache
@ -173,6 +174,43 @@ async function fetchDirectFallback(
  return { content: text, title, contentType };
 }

+// =============================================================================
+// Ollama Web Fetch
+// =============================================================================
+
+interface OllamaWebFetchResponse {
+  title?: string;
+  content?: string;
+  links?: string[];
+}
+
+/**
+ * Fetch page content via Ollama web_fetch API.
+ * Returns content + metadata, or throws on failure.
+ */
+async function fetchViaOllama(
+  url: string,
+  signal?: AbortSignal,
+): Promise<{ content: string; title?: string }> {
+  const response = await fetchSimple("https://ollama.com/api/web_fetch", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${getOllamaApiKey()}`,
+    },
+    body: JSON.stringify({ url }),
+    signal,
+    timeoutMs: 20_000,
+  });
+
+  const data: OllamaWebFetchResponse = await response.json();
+
+  const content = (data.content || "").trim();
+  const title = data.title?.trim() || undefined;
+
+  return { content, title };
+}
+
 // =============================================================================
 // Smart Truncation
 // =============================================================================
@ -252,6 +290,30 @@ async function fetchOnePage(
    jinaError = err instanceof HttpError
      ? `Jina HTTP ${err.statusCode}`
      : (err as Error).message ?? String(err);
+
+    // Try Ollama web_fetch as intermediate fallback if API key is available
+    const ollamaKey = getOllamaApiKey();
+    if (ollamaKey) {
+      try {
+        const ollamaResult = await fetchViaOllama(url, options.signal);
+        if (ollamaResult.content && ollamaResult.content.length >= 50) {
+          pageContent = ollamaResult.content;
+          pageTitle = ollamaResult.title;
+          source = "direct";
+          return {
+            content: pageContent,
+            title: pageTitle,
+            source,
+            jinaError,
+            contentType,
+            originalChars: pageContent.length,
+          };
+        }
+      } catch {
+        // Ollama fetch failed too — fall through to direct
+      }
+    }
+
    source = "direct";

    const result = await fetchDirectFallback(url, options.signal);
--- a/src/resources/extensions/search-the-web/tool-llm-context.ts
+++ b/src/resources/extensions/search-the-web/tool-llm-context.ts
@ -27,7 +27,7 @@ import { normalizeQuery, extractDomain } from "./url-utils.js";
 import { formatLLMContext, type LLMContextSnippet, type LLMContextSource } from "./format.js";
 import type { TavilyResult, TavilySearchResponse } from "./tavily.js";
 import { publishedDateToAge } from "./tavily.js";
-import { getTavilyApiKey, resolveSearchProvider } from "./provider.js";
+import { getTavilyApiKey, getOllamaApiKey, resolveSearchProvider } from "./provider.js";

 // =============================================================================
 // Types
@ -79,7 +79,7 @@ interface LLMContextDetails {
  errorKind?: string;
  error?: string;
  retryAfterMs?: number;
-  provider?: 'tavily' | 'brave';
+  provider?: 'tavily' | 'brave' | 'ollama';
 }

 // =============================================================================
@ -230,6 +230,57 @@ async function executeTavilyLLMContext(
  return { cached, latencyMs: timed.latencyMs, rateLimit: timed.rateLimit };
 }

+// =============================================================================
+// Ollama LLM Context Execution
+// =============================================================================
+
+interface OllamaWebSearchResult {
+  title: string;
+  url: string;
+  content: string;
+}
+
+interface OllamaWebSearchResponse {
+  results: OllamaWebSearchResult[];
+}
+
+/**
+ * Execute a search_and_read query against the Ollama web_search API.
+ *
+ * Uses the same web_search endpoint as tool-search, then applies
+ * budgetContent() for client-side token budgeting (similar to Tavily path).
+ */
+async function executeOllamaLLMContext(
+  params: { query: string; maxTokens: number; count: number; threshold: string },
+  signal?: AbortSignal,
+): Promise<{ cached: CachedLLMContext; latencyMs: number; rateLimit?: RateLimitInfo }> {
+  const scoreThreshold = THRESHOLD_TO_SCORE[params.threshold] ?? 0.5;
+
+  const timed = await fetchWithRetryTimed("https://ollama.com/api/web_search", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${getOllamaApiKey()}`,
+    },
+    body: JSON.stringify({ query: params.query, max_results: params.count }),
+    signal,
+  }, 2);
+
+  const data: OllamaWebSearchResponse = await timed.response.json();
+
+  // Convert Ollama results to TavilyResult-compatible format for budgetContent
+  const tavilyLikeResults: TavilyResult[] = (data.results || []).map(r => ({
+    title: r.title || "(untitled)",
+    url: r.url,
+    content: r.content || "",
+    score: 1.0, // Ollama doesn't provide scores, assume all are relevant
+  }));
+
+  const cached = budgetContent(tavilyLikeResults, params.maxTokens, scoreThreshold);
+
+  return { cached, latencyMs: timed.latencyMs, rateLimit: timed.rateLimit };
+}
+
 // =============================================================================
 // Tool Registration
 // =============================================================================
@ -295,7 +346,7 @@ export function registerLLMContextTool(pi: ExtensionAPI) {
      const provider = resolveSearchProvider();
      if (!provider) {
        return {
-          content: [{ type: "text", text: "search_and_read unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY or BRAVE_API_KEY." }],
+          content: [{ type: "text", text: "search_and_read unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, or OLLAMA_API_KEY." }],
          isError: true,
          details: { errorKind: "auth_error", error: "No search API key set" } satisfies Partial<LLMContextDetails>,
        };
@ -358,6 +409,14 @@ export function registerLLMContextTool(pi: ExtensionAPI) {
          result = tavilyResult.cached;
          latencyMs = tavilyResult.latencyMs;
          rateLimit = tavilyResult.rateLimit;
+        } else if (provider === "ollama") {
+          const ollamaResult = await executeOllamaLLMContext(
+            { query: params.query, maxTokens, count, threshold },
+            signal,
+          );
+          result = ollamaResult.cached;
+          latencyMs = ollamaResult.latencyMs;
+          rateLimit = ollamaResult.rateLimit;
        } else {
          // ================================================================
          // BRAVE PATH (unchanged API logic)
--- a/src/resources/extensions/search-the-web/tool-search.ts
+++ b/src/resources/extensions/search-the-web/tool-search.ts
@ -20,7 +20,7 @@ import { LRUTTLCache } from "./cache.js";
 import { fetchWithRetryTimed, fetchWithRetry, classifyError, type RateLimitInfo } from "./http.js";
 import { normalizeQuery, toDedupeKey, detectFreshness } from "./url-utils.js";
 import { formatSearchResults, type SearchResultFormatted, type FormatSearchOptions } from "./format.js";
-import { getTavilyApiKey, resolveSearchProvider } from "./provider.js";
+import { getTavilyApiKey, getOllamaApiKey, resolveSearchProvider } from "./provider.js";
 import { normalizeTavilyResult, mapFreshnessToTavily, type TavilySearchResponse } from "./tavily.js";

 // =============================================================================
@ -93,7 +93,7 @@ interface SearchDetails {
  errorKind?: string;
  error?: string;
  retryAfterMs?: number;
-  provider?: 'tavily' | 'brave';
+  provider?: 'tavily' | 'brave' | 'ollama';
 }

 // =============================================================================
@ -245,6 +245,57 @@ async function executeTavilySearch(
  };
 }

+// =============================================================================
+// Ollama API execution
+// =============================================================================
+
+interface OllamaWebSearchResult {
+  title: string;
+  url: string;
+  content: string;
+}
+
+interface OllamaWebSearchResponse {
+  results: OllamaWebSearchResult[];
+}
+
+/**
+ * Execute a search against the Ollama web_search API.
+ * Returns a CachedSearchResult with normalized, deduplicated results.
+ */
+async function executeOllamaSearch(
+  params: { query: string; count: number },
+  signal?: AbortSignal
+): Promise<{ results: CachedSearchResult; latencyMs: number; rateLimit?: RateLimitInfo }> {
+  const timed = await fetchWithRetryTimed("https://ollama.com/api/web_search", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${getOllamaApiKey()}`,
+    },
+    body: JSON.stringify({ query: params.query, max_results: params.count }),
+    signal,
+  }, 2);
+
+  const data: OllamaWebSearchResponse = await timed.response.json();
+  const normalized: SearchResultFormatted[] = (data.results || []).map(r => ({
+    title: r.title || "(untitled)",
+    url: r.url,
+    description: r.content || "",
+  }));
+  const deduplicated = deduplicateResults(normalized);
+
+  return {
+    results: {
+      results: deduplicated,
+      queryCorrected: false,
+      moreResultsAvailable: false,
+    },
+    latencyMs: timed.latencyMs,
+    rateLimit: timed.rateLimit,
+  };
+}
+
 // =============================================================================
 // Tool Registration
 // =============================================================================
@ -300,7 +351,7 @@ export function registerSearchTool(pi: ExtensionAPI) {
      const provider = resolveSearchProvider();
      if (!provider) {
        return {
-          content: [{ type: "text", text: "Web search unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY or BRAVE_API_KEY." }],
+          content: [{ type: "text", text: "Web search unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, or OLLAMA_API_KEY." }],
          isError: true,
          details: { errorKind: "auth_error", error: "No search API key set" } satisfies Partial<SearchDetails>,
        };
@ -405,6 +456,14 @@ export function registerSearchTool(pi: ExtensionAPI) {
          searchResult = tavilyResult.results;
          latencyMs = tavilyResult.latencyMs;
          rateLimit = tavilyResult.rateLimit;
+        } else if (provider === "ollama") {
+          const ollamaResult = await executeOllamaSearch(
+            { query: params.query, count: 10 },
+            signal
+          );
+          searchResult = ollamaResult.results;
+          latencyMs = ollamaResult.latencyMs;
+          rateLimit = ollamaResult.rateLimit;
        } else {
          // ================================================================
          // BRAVE PATH (unchanged API logic)
--- a/src/tests/provider.test.ts
+++ b/src/tests/provider.test.ts
@ -112,7 +112,7 @@ test('resolveSearchProvider returns null when neither key is set', async () => {
  const { resolveSearchProvider } = await import(
    '../resources/extensions/search-the-web/provider.ts'
  )
-  withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined }, () => {
+  withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined, OLLAMA_API_KEY: undefined }, () => {
    const result = resolveSearchProvider('auto')
    assert.equal(result, null)
  })
@ -245,7 +245,7 @@ test('getBraveApiKey reads from process.env.BRAVE_API_KEY', async () => {
 // 4. Boundary contract — S01→S02 public API surface
 // ═══════════════════════════════════════════════════════════════════════════

-test('provider.ts exports exactly the 5 expected functions', async () => {
+test('provider.ts exports exactly the 6 expected functions', async () => {
  const provider = await import(
    '../resources/extensions/search-the-web/provider.ts'
  )
@ -254,6 +254,7 @@ test('provider.ts exports exactly the 5 expected functions', async () => {
    'resolveSearchProvider',
    'getTavilyApiKey',
    'getBraveApiKey',
+    'getOllamaApiKey',
    'getSearchProviderPreference',
    'setSearchProviderPreference',
  ] as const
@ -270,6 +271,6 @@ test('provider.ts exports exactly the 5 expected functions', async () => {
  assert.deepEqual(
    actualFunctions.sort(),
    [...expectedExports].sort(),
-    'provider.ts should export exactly the 5 expected functions (no extra function exports)',
+    'provider.ts should export exactly the 6 expected functions (no extra function exports)',
  )
 })
--- a/src/tests/search-provider-command.test.ts
+++ b/src/tests/search-provider-command.test.ts
@ -206,12 +206,13 @@ test('no arg shows select UI with 3 options, user picks brave', async () => {

    // Select UI shown
    assert.equal(ctx.ui.selectCalls.length, 1, 'should show select UI')
-    assert.equal(ctx.ui.selectCalls[0].options.length, 3)
+    assert.equal(ctx.ui.selectCalls[0].options.length, 4)

    // Options show key status
    assert.match(ctx.ui.selectCalls[0].options[0], /tavily \(key: ✓\)/)
    assert.match(ctx.ui.selectCalls[0].options[1], /brave \(key: ✓\)/)
-    assert.equal(ctx.ui.selectCalls[0].options[2], 'auto')
+    assert.match(ctx.ui.selectCalls[0].options[2], /ollama \(key:/)
+    assert.equal(ctx.ui.selectCalls[0].options[3], 'auto')

    // Title shows current preference
    assert.match(ctx.ui.selectCalls[0].title, /current:/)
@ -273,16 +274,16 @@ test('invalid arg "google" falls back to interactive select', async () => {
 // 7. Tab completion — all 3 options when prefix is empty
 // ═══════════════════════════════════════════════════════════════════════════

-test('tab completion returns all 3 options when prefix is empty', async () => {
+test('tab completion returns all 4 options when prefix is empty', async () => {
  const cmd = await loadCommand()

  withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, () => {
    const items = cmd.getArgumentCompletions!('')
    assert.ok(items, 'completions should not be null')
-    assert.equal(items!.length, 3)
+    assert.equal(items!.length, 4)

    const values = items!.map((i: any) => i.value)
-    assert.deepEqual(values, ['tavily', 'brave', 'auto'])
+    assert.deepEqual(values, ['tavily', 'brave', 'ollama', 'auto'])

    // Each item has label and description
    for (const item of items!) {
--- a/src/wizard.ts
+++ b/src/wizard.ts
@ -17,6 +17,7 @@ export function loadStoredEnvKeys(authStorage: AuthStorage): void {
    ['slack_bot',     'SLACK_BOT_TOKEN'],
    ['discord_bot',   'DISCORD_BOT_TOKEN'],
    ['groq',          'GROQ_API_KEY'],
+    ['ollama-cloud',  'OLLAMA_API_KEY'],
  ]
  for (const [provider, envVar] of providers) {
    if (!process.env[envVar]) {