From 00c6442e1a5f7980c7c56ec24653041b9a5093b4 Mon Sep 17 00:00:00 2001 From: Luan Neves Barroso <34319015+luannevesb@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:03:57 -0300 Subject: [PATCH] fix(ollama): add cloud auth support and resolve real context window via /api/show (#4017) - Add OLLAMA_API_KEY Bearer token auth to all Ollama HTTP client requests (fetchWithTimeout, pullModel, chat) via getAuthHeaders/withAuth helpers. Local Ollama ignores the Authorization header; cloud endpoints require it. - Fix isRunning() probe for cloud endpoints: use /api/tags instead of root / since cloud hosts may not serve the root endpoint. - Resolve real context window for unknown models via /api/show model_info ({arch}.context_length) instead of defaulting to 8192. Priority chain: known table > /api/show > estimate from parameter_size > 8192. - Use dependency injection for discoverModels() to allow test mocking without ESM named export issues. - Pick up OLLAMA_API_KEY in provider registration (apiKey field). Closes #3544 Co-authored-by: luannevesb Co-authored-by: Claude Opus 4.6 --- src/resources/extensions/ollama/index.ts | 9 ++-- .../extensions/ollama/ollama-client.ts | 41 +++++++++++--- .../extensions/ollama/ollama-discovery.ts | 43 ++++++++++++--- .../ollama/tests/ollama-discovery.test.ts | 54 +++++++++++++++++++ 4 files changed, 130 insertions(+), 17 deletions(-) diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts index 8ea39d683..6934f4c26 100644 --- a/src/resources/extensions/ollama/index.ts +++ b/src/resources/extensions/ollama/index.ts @@ -69,13 +69,12 @@ async function probeAndRegister(pi: ExtensionAPI): Promise { const baseUrl = client.getOllamaHost(); - // Use authMode "apiKey" with a dummy key (#3440). - // authMode "none" requires a custom streamSimple handler, but Ollama uses - // the standard OpenAI-compatible streaming endpoint. Ollama ignores the - // Authorization header so the dummy key is harmless. + // Use authMode "apiKey" (#3440). Local Ollama ignores the Authorization header, + // so the "ollama" fallback is harmless. For cloud endpoints (OLLAMA_HOST pointing + // to ollama.com or a remote instance), OLLAMA_API_KEY is picked up here. pi.registerProvider("ollama", { authMode: "apiKey", - apiKey: "ollama", + apiKey: process.env.OLLAMA_API_KEY ?? "ollama", baseUrl, api: "ollama-chat", streamSimple: streamOllamaChat, diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts index 4738c09da..2408215fd 100644 --- a/src/resources/extensions/ollama/ollama-client.ts +++ b/src/resources/extensions/ollama/ollama-client.ts @@ -34,11 +34,34 @@ export function getOllamaHost(): string { return `http://${host}`; } +/** + * Get auth headers for Ollama API requests. + * For cloud endpoints (OLLAMA_HOST pointing to ollama.com or remote instances), + * OLLAMA_API_KEY is used as a Bearer token. Local Ollama ignores the header. + */ +function getAuthHeaders(): Record { + const apiKey = process.env.OLLAMA_API_KEY; + if (!apiKey) return {}; + return { Authorization: `Bearer ${apiKey}` }; +} + +/** + * Merge auth headers into request options. + */ +function withAuth(options: RequestInit = {}): RequestInit { + const authHeaders = getAuthHeaders(); + if (Object.keys(authHeaders).length === 0) return options; + return { + ...options, + headers: { ...authHeaders, ...(options.headers as Record || {}) }, + }; +} + async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { - return await fetch(url, { ...options, signal: controller.signal }); + return await fetch(url, withAuth({ ...options, signal: controller.signal })); } finally { clearTimeout(timeout); } @@ -46,10 +69,16 @@ async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutM /** * Check if Ollama is running and reachable. + * For cloud endpoints (OLLAMA_HOST pointing to ollama.com), uses /api/tags + * as the probe since the root endpoint may not be available. */ export async function isRunning(): Promise { try { - const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS); + const host = getOllamaHost(); + const isCloud = host.includes("ollama.com") || host.includes("cloud"); + const probeUrl = isCloud ? `${host}/api/tags` : `${host}/`; + const timeout = isCloud ? REQUEST_TIMEOUT_MS : PROBE_TIMEOUT_MS; + const response = await fetchWithTimeout(probeUrl, isCloud ? { method: "GET" } : {}, timeout); return response.ok; } catch { return false; @@ -117,12 +146,12 @@ export async function pullModel( onProgress?: (progress: OllamaPullProgress) => void, signal?: AbortSignal, ): Promise { - const response = await fetch(`${getOllamaHost()}/api/pull`, { + const response = await fetch(`${getOllamaHost()}/api/pull`, withAuth({ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name, stream: true }), signal, - }); + })); if (!response.ok) { const text = await response.text(); @@ -146,12 +175,12 @@ export async function* chat( request: OllamaChatRequest, signal?: AbortSignal, ): AsyncGenerator { - const response = await fetch(`${getOllamaHost()}/api/chat`, { + const response = await fetch(`${getOllamaHost()}/api/chat`, withAuth({ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(request), signal, - }); + })); if (!response.ok) { const text = await response.text(); diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts index 29fb1bc77..bc2105b74 100644 --- a/src/resources/extensions/ollama/ollama-discovery.ts +++ b/src/resources/extensions/ollama/ollama-discovery.ts @@ -8,7 +8,7 @@ * Returns models in the format expected by pi.registerProvider(). */ -import { listModels } from "./ollama-client.js"; +import { listModels, showModel } from "./ollama-client.js"; import { estimateContextFromParams, formatModelSize, @@ -17,6 +17,24 @@ import { } from "./model-capabilities.js"; import type { OllamaChatOptions, OllamaModelInfo } from "./types.js"; +/** + * Extract context window from /api/show model_info. + * Keys follow the pattern "{architecture}.context_length" (e.g. "llama.context_length"). + */ +function extractContextFromModelInfo(modelInfo: Record): number | undefined { + for (const [key, value] of Object.entries(modelInfo)) { + if (key.endsWith(".context_length") && typeof value === "number" && value > 0) { + return value; + } + } + return undefined; +} + +type ClientDeps = { + listModels: typeof listModels; + showModel: typeof showModel; +}; + export interface DiscoveredOllamaModel { id: string; name: string; @@ -35,13 +53,26 @@ export interface DiscoveredOllamaModel { const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }; -function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel { +async function enrichModel(info: OllamaModelInfo, deps: ClientDeps): Promise { const caps = getModelCapabilities(info.name); const parameterSize = info.details?.parameter_size ?? ""; - // Determine context window: known table > estimate from param size > default + // /api/tags doesn't include context length; /api/show does via "{arch}.context_length" in model_info. + let showContextWindow: number | undefined; + if (caps.contextWindow === undefined) { + try { + const showData = await deps.showModel(info.name); + showContextWindow = extractContextFromModelInfo(showData.model_info); + } catch (err) { + // non-fatal: fall through to estimate + if (process.env.GSD_DEBUG) console.warn(`[ollama] /api/show failed for ${info.name}:`, err instanceof Error ? err.message : String(err)); + } + } + + // Determine context window: known table > /api/show > estimate from param size > default const contextWindow = caps.contextWindow ?? + showContextWindow ?? (parameterSize ? estimateContextFromParams(parameterSize) : 8192); // Determine max tokens: known table > fraction of context > default @@ -73,11 +104,11 @@ function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel { /** * Discover all locally available Ollama models with enriched capabilities. */ -export async function discoverModels(): Promise { - const tags = await listModels(); +export async function discoverModels(deps: ClientDeps = { listModels, showModel }): Promise { + const tags = await deps.listModels(); if (!tags.models || tags.models.length === 0) return []; - return tags.models.map(enrichModel); + return Promise.all(tags.models.map((m) => enrichModel(m, deps))); } /** diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts index a228bf663..02d582d19 100644 --- a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts +++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts @@ -1 +1,55 @@ // GSD2 — Tests for Ollama model discovery and enrichment +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { discoverModels } from "../ollama-discovery.js"; +import type { OllamaTagsResponse, OllamaShowResponse } from "../types.js"; + +const EMPTY_DETAILS = { parent_model: "", format: "", family: "", families: null, parameter_size: "", quantization_level: "" }; + +function modelStub(name: string, parameterSize = "") { + return { name, model: name, modified_at: "", size: 0, digest: "", details: { ...EMPTY_DETAILS, parameter_size: parameterSize } }; +} + +function tagsStub(name: string, parameterSize = ""): OllamaTagsResponse { + return { models: [modelStub(name, parameterSize)] }; +} + +function showStub(modelInfo: Record): OllamaShowResponse { + return { modelfile: "", parameters: "", template: "", details: EMPTY_DETAILS, model_info: modelInfo }; +} + +describe("discoverModels — context window resolution", () => { + it("uses known table context window without calling /api/show", async () => { + let showCalled = false; + const models = await discoverModels({ + listModels: async () => tagsStub("llama3.2:latest", "3B"), + showModel: async () => { showCalled = true; throw new Error("should not be called"); }, + }); + assert.equal(models[0].contextWindow, 131072); + assert.equal(showCalled, false); + }); + + it("uses context_length from /api/show model_info for unknown model", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("gemini-3-flash-preview:latest"), + showModel: async () => showStub({ "gemini.context_length": 1048576 }), + }); + assert.equal(models[0].contextWindow, 1048576); + }); + + it("falls back to 8192 when /api/show model_info has no context_length key", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("unknown-model:latest"), + showModel: async () => showStub({}), + }); + assert.equal(models[0].contextWindow, 8192); + }); + + it("falls back to 8192 when /api/show throws", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("unknown-model:latest"), + showModel: async () => { throw new Error("network error"); }, + }); + assert.equal(models[0].contextWindow, 8192); + }); +}); \ No newline at end of file