diff --git a/packages/pi-coding-agent/src/core/model-discovery.test.ts b/packages/pi-coding-agent/src/core/model-discovery.test.ts index 43a35a7a3..39d488873 100644 --- a/packages/pi-coding-agent/src/core/model-discovery.test.ts +++ b/packages/pi-coding-agent/src/core/model-discovery.test.ts @@ -22,6 +22,13 @@ describe("getDiscoveryAdapter", () => { assert.equal(adapter.supportsDiscovery, true); }); + it("returns an adapter for ollama-cloud", () => { + const adapter = getDiscoveryAdapter("ollama-cloud"); + assert.equal(adapter.provider, "ollama-cloud"); + assert.equal(adapter.supportsDiscovery, true); + assert.equal(adapter.requiresAuthForDiscovery, false); + }); + it("returns an adapter for openrouter", () => { const adapter = getDiscoveryAdapter("openrouter"); assert.equal(adapter.provider, "openrouter"); @@ -66,6 +73,7 @@ describe("getDiscoverableProviders", () => { const providers = getDiscoverableProviders(); assert.ok(providers.includes("openai")); assert.ok(providers.includes("ollama")); + assert.ok(providers.includes("ollama-cloud")); assert.ok(providers.includes("openrouter")); assert.ok(providers.includes("google")); assert.ok(!providers.includes("anthropic")); @@ -92,6 +100,10 @@ describe("getDefaultTTL", () => { assert.equal(getDefaultTTL("openai"), 60 * 60 * 1000); }); + it("returns 1 hour for ollama-cloud", () => { + assert.equal(getDefaultTTL("ollama-cloud"), 60 * 60 * 1000); + }); + it("returns 1 hour for google", () => { assert.equal(getDefaultTTL("google"), 60 * 60 * 1000); }); @@ -110,6 +122,7 @@ describe("getDefaultTTL", () => { describe("DISCOVERY_TTLS", () => { it("has expected keys", () => { assert.ok("ollama" in DISCOVERY_TTLS); + assert.ok("ollama-cloud" in DISCOVERY_TTLS); assert.ok("openai" in DISCOVERY_TTLS); assert.ok("google" in DISCOVERY_TTLS); assert.ok("openrouter" in DISCOVERY_TTLS); @@ -123,3 +136,43 @@ describe("DISCOVERY_TTLS", () => { } }); }); + +// ─── Ollama Cloud Adapter ─────────────────────────────────────────────────── + +describe("ollama-cloud discovery", () => { + it("uses the live OpenAI-compatible /v1/models endpoint", async () => { + const originalFetch = globalThis.fetch; + const calls: Array<{ url: string; headers?: HeadersInit }> = []; + globalThis.fetch = (async ( + input: string | URL | Request, + init?: RequestInit, + ) => { + calls.push({ url: String(input), headers: init?.headers }); + return new Response( + JSON.stringify({ + data: [ + { id: "kimi-k2.5", object: "model", owned_by: "ollama" }, + { id: "kimi-k2.6", object: "model", owned_by: "ollama" }, + ], + }), + { status: 200 }, + ); + }) as typeof fetch; + + try { + const adapter = getDiscoveryAdapter("ollama-cloud"); + const models = await adapter.fetchModels("test-key"); + + assert.equal(calls[0]?.url, "https://ollama.com/v1/models"); + assert.deepEqual(calls[0]?.headers, { + Authorization: "Bearer test-key", + }); + assert.deepEqual( + models.map((m) => m.id), + ["kimi-k2.5", "kimi-k2.6"], + ); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); diff --git a/packages/pi-coding-agent/src/core/model-discovery.ts b/packages/pi-coding-agent/src/core/model-discovery.ts index 7e8ce3372..49acf52f8 100644 --- a/packages/pi-coding-agent/src/core/model-discovery.ts +++ b/packages/pi-coding-agent/src/core/model-discovery.ts @@ -23,12 +23,14 @@ export interface DiscoveryResult { export interface ProviderDiscoveryAdapter { provider: string; supportsDiscovery: boolean; + requiresAuthForDiscovery?: boolean; fetchModels(apiKey: string, baseUrl?: string): Promise; } /** Per-provider TTLs in milliseconds */ export const DISCOVERY_TTLS: Record = { ollama: 5 * 60 * 1000, // 5 minutes (local, models change often) + "ollama-cloud": 60 * 60 * 1000, // 1 hour openai: 60 * 60 * 1000, // 1 hour google: 60 * 60 * 1000, // 1 hour openrouter: 60 * 60 * 1000, // 1 hour @@ -104,6 +106,35 @@ class OllamaDiscoveryAdapter implements ProviderDiscoveryAdapter { } } +// ─── Ollama Cloud Adapter ──────────────────────────────────────────────────── + +class OllamaCloudDiscoveryAdapter implements ProviderDiscoveryAdapter { + provider = "ollama-cloud"; + supportsDiscovery = true; + requiresAuthForDiscovery = false; + + async fetchModels(apiKey: string, baseUrl?: string): Promise { + const root = (baseUrl ?? "https://ollama.com").replace(/\/+$/, ""); + const url = root.endsWith("/v1") ? `${root}/models` : `${root}/v1/models`; + const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined; + const response = await fetchWithTimeout(url, { headers }); + + if (!response.ok) { + throw new Error(`Ollama Cloud models API returned ${response.status}: ${response.statusText}`); + } + + const data = (await response.json()) as { + data: Array<{ id: string; name?: string; owned_by?: string }>; + }; + + return (data.data ?? []).map((m) => ({ + id: m.id, + name: m.name ?? m.id, + input: ["text" as const], + })); + } +} + // ─── OpenRouter Adapter ────────────────────────────────────────────────────── class OpenRouterDiscoveryAdapter implements ProviderDiscoveryAdapter { @@ -209,6 +240,7 @@ class StaticDiscoveryAdapter implements ProviderDiscoveryAdapter { const adapters: Record = { openai: new OpenAIDiscoveryAdapter(), ollama: new OllamaDiscoveryAdapter(), + "ollama-cloud": new OllamaCloudDiscoveryAdapter(), openrouter: new OpenRouterDiscoveryAdapter(), google: new GoogleDiscoveryAdapter(), anthropic: new StaticDiscoveryAdapter("anthropic"), diff --git a/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts b/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts index 223c5b471..84886484b 100644 --- a/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts +++ b/packages/pi-coding-agent/src/core/model-registry-discovery.test.ts @@ -1,16 +1,24 @@ import assert from "node:assert/strict"; -import { mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, beforeEach, describe, it } from "node:test"; import { AuthStorage } from "./auth-storage.js"; import { ModelDiscoveryCache } from "./discovery-cache.js"; -import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js"; +import { + getDefaultTTL, + getDiscoverableProviders, + getDiscoveryAdapter, +} from "./model-discovery.js"; +import { ModelRegistry } from "./model-registry.js"; let testDir: string; beforeEach(() => { - testDir = join(tmpdir(), `model-registry-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); + testDir = join( + tmpdir(), + `model-registry-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); mkdirSync(testDir, { recursive: true }); }); @@ -53,15 +61,29 @@ describe("Discovery adapter resolution", () => { const providers = getDiscoverableProviders(); for (const provider of providers) { const adapter = getDiscoveryAdapter(provider); - assert.equal(adapter.supportsDiscovery, true, `${provider} should support discovery`); + assert.equal( + adapter.supportsDiscovery, + true, + `${provider} should support discovery`, + ); } }); it("static adapters return empty model lists", async () => { - const staticProviders = ["anthropic", "bedrock", "azure-openai", "groq", "cerebras"]; + const staticProviders = [ + "anthropic", + "bedrock", + "azure-openai", + "groq", + "cerebras", + ]; for (const provider of staticProviders) { const adapter = getDiscoveryAdapter(provider); - assert.equal(adapter.supportsDiscovery, false, `${provider} should not support discovery`); + assert.equal( + adapter.supportsDiscovery, + false, + `${provider} should not support discovery`, + ); const models = await adapter.fetchModels("dummy-key"); assert.deepEqual(models, [], `${provider} should return empty models`); } @@ -86,6 +108,40 @@ describe("AuthStorage — hasAuth for discovery providers", () => { }); }); +// ─── public model-list discovery ───────────────────────────────────────────── + +describe("ModelRegistry — public discovery providers", () => { + it("discovers ollama-cloud models from live model listing without stored auth", async () => { + const originalFetch = globalThis.fetch; + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + data: [{ id: "kimi-k2.5" }, { id: "kimi-k2.6" }], + }), + { status: 200 }, + )) as typeof fetch; + + try { + const registry = new ModelRegistry(AuthStorage.inMemory({}), undefined); + const results = await registry.discoverModels(["ollama-cloud"]); + + assert.equal(results[0]?.provider, "ollama-cloud"); + assert.deepEqual( + results[0]?.models.map((m) => m.id), + ["kimi-k2.5", "kimi-k2.6"], + ); + assert.ok( + registry + .getAllWithDiscovered() + .some((m) => m.provider === "ollama-cloud" && m.id === "kimi-k2.6"), + "discovered Kimi K2.6 is retained as ollama-cloud/kimi-k2.6", + ); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); + // ─── cache persistence across instances ────────────────────────────────────── describe("ModelDiscoveryCache — persistence", () => { @@ -123,7 +179,10 @@ describe("Discovery TTL configuration", () => { it("ollama has shortest TTL (local models change often)", () => { const ollamaTTL = getDefaultTTL("ollama"); const openaiTTL = getDefaultTTL("openai"); - assert.ok(ollamaTTL < openaiTTL, "ollama TTL should be shorter than openai"); + assert.ok( + ollamaTTL < openaiTTL, + "ollama TTL should be shorter than openai", + ); }); it("unknown providers get default TTL", () => { diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts index 579b868d6..50dec3240 100644 --- a/packages/pi-coding-agent/src/core/model-registry.ts +++ b/packages/pi-coding-agent/src/core/model-registry.ts @@ -956,7 +956,12 @@ export class ModelRegistry { try { const apiKey = await this.authStorage.getApiKey(providerName); - if (!apiKey && !this.isProviderRequestReady(providerName)) continue; + if ( + !apiKey && + !this.isProviderRequestReady(providerName) && + adapter.requiresAuthForDiscovery !== false + ) + continue; const models = await adapter.fetchModels(apiKey ?? "", undefined); this.discoveryCache.set(providerName, models); diff --git a/src/resources/extensions/sf/auto-model-selection.ts b/src/resources/extensions/sf/auto-model-selection.ts index 5344da079..ee9135951 100644 --- a/src/resources/extensions/sf/auto-model-selection.ts +++ b/src/resources/extensions/sf/auto-model-selection.ts @@ -148,7 +148,7 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{ { match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] }, { match: /^kimi-|^k2p5$/i, - providers: ["kimi-coding", "opencode", "opencode-go"], + providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"], }, { match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] }, { @@ -168,21 +168,24 @@ function preferredBareModelIds(modelId: string): readonly string[] | undefined { lower === "kimi-for-coding" || lower === "kimi-k2.6" || lower === "kimi-k2.6:cloud" || + lower === "kimi-k2.6-cloud" || lower === "moonshotai/kimi-k2.6" ) { return [ "kimi-for-coding", "kimi-k2.6", "kimi-k2.6:cloud", + "kimi-k2.6-cloud", "moonshotai/kimi-k2.6", ]; } if ( lower === "k2p5" || lower === "kimi-k2.5" || + lower === "kimi-k2.5:cloud" || lower === "moonshotai/kimi-k2.5" ) { - return ["k2p5", "kimi-k2.5", "moonshotai/kimi-k2.5"]; + return ["k2p5", "kimi-k2.5", "kimi-k2.5:cloud", "moonshotai/kimi-k2.5"]; } return undefined; } @@ -214,26 +217,28 @@ function resolveFamilyPreferredBareModel< function bareModelIdAliases(modelId: string): Set { const lower = modelId.toLowerCase(); const aliases = new Set([lower]); - if (lower === "kimi-for-coding" || lower === "kimi-k2.6") { + if ( + lower === "kimi-for-coding" || + lower === "kimi-k2.6" || + lower === "kimi-k2.6:cloud" || + lower === "kimi-k2.6-cloud" || + lower === "moonshotai/kimi-k2.6" + ) { aliases.add("kimi-for-coding"); aliases.add("kimi-k2.6"); aliases.add("kimi-k2.6:cloud"); + aliases.add("kimi-k2.6-cloud"); aliases.add("moonshotai/kimi-k2.6"); } - if (lower === "kimi-k2.6:cloud" || lower === "moonshotai/kimi-k2.6") { - aliases.add("kimi-for-coding"); - aliases.add("kimi-k2.6"); - aliases.add("kimi-k2.6:cloud"); - aliases.add("moonshotai/kimi-k2.6"); - } - if (lower === "kimi-k2.5" || lower === "moonshotai/kimi-k2.5") { - aliases.add("k2p5"); - aliases.add("kimi-k2.5"); - aliases.add("moonshotai/kimi-k2.5"); - } - if (lower === "k2p5") { + if ( + lower === "k2p5" || + lower === "kimi-k2.5" || + lower === "kimi-k2.5:cloud" || + lower === "moonshotai/kimi-k2.5" + ) { aliases.add("k2p5"); aliases.add("kimi-k2.5"); + aliases.add("kimi-k2.5:cloud"); aliases.add("moonshotai/kimi-k2.5"); } return aliases; diff --git a/src/resources/extensions/sf/learning/data/primary-provider-chain.json b/src/resources/extensions/sf/learning/data/primary-provider-chain.json index 6a51bdc69..1d5f26f19 100644 --- a/src/resources/extensions/sf/learning/data/primary-provider-chain.json +++ b/src/resources/extensions/sf/learning/data/primary-provider-chain.json @@ -1,5 +1,4 @@ [ { "provider": "kimi-coding", "model": "kimi-for-coding", "priority": 0 }, - { "provider": "ollama-cloud", "model": "kimi-k2.5:cloud", "priority": 1 }, - { "provider": "opencode-go", "model": "kimi-k2.5", "priority": 2 } + { "provider": "ollama-cloud", "model": "kimi-k2.6", "priority": 1 } ] diff --git a/src/resources/extensions/sf/learning/fallback-chain-writer.mjs b/src/resources/extensions/sf/learning/fallback-chain-writer.mjs index b74bc1639..934581e6d 100644 --- a/src/resources/extensions/sf/learning/fallback-chain-writer.mjs +++ b/src/resources/extensions/sf/learning/fallback-chain-writer.mjs @@ -475,12 +475,12 @@ export function writeFallbackChains(settingsPath, deps) { chainsByName[DEFAULT_CHAIN_NAME] = defaultEntries; } - // Step 3b: hardcoded `main` chain — three provider routes for the user's - // primary model (Kimi K2.5). This is a provider-cover chain: every entry + // Step 3b: hardcoded `main` chain — provider routes for the user's + // primary model (Kimi K2.6). This is a provider-cover chain: every entry // serves the same underlying model via a different provider, so the // retry-handler can rotate past a 429'd provider without flipping to a - // different model family. If all three routes exhaust, tasks running on - // the main model fail (no cross-model fallback). Loaded from + // different model family. If every exact-version route exhausts, tasks + // running on the main model fail (no cross-model fallback). Loaded from // `./data/primary-provider-chain.json` so the list is editable without // touching code. chainsByName[MAIN_CHAIN_NAME] = primaryProviderChainEntries; diff --git a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs index 64e8f7195..4477941e1 100644 --- a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs +++ b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs @@ -318,7 +318,7 @@ test("writeFallbackChains warns via log when project-level .sf/agent/settings.js } }); -test("writeFallbackChains always emits the hardcoded main chain with canonical kimi-for-coding primary route", () => { +test("writeFallbackChains always emits the hardcoded main chain with exact Kimi K2.6 routes", () => { const { dir, settingsPath } = makeTempSettingsDir(); try { // Deps deliberately minimal — no overrides, no enabledModels — so @@ -331,19 +331,20 @@ test("writeFallbackChains always emits the hardcoded main chain with canonical k const mainChain = written.fallback.chains.main; assert.ok(Array.isArray(mainChain), "main chain present"); - assert.equal(mainChain.length, 3, "main chain has exactly 3 entries"); + assert.equal(mainChain.length, 2, "main chain has exactly 2 entries"); assert.equal(mainChain[0].provider, "kimi-coding"); assert.equal(mainChain[0].model, "kimi-for-coding"); assert.equal(mainChain[0].priority, 0); assert.equal(mainChain[1].provider, "ollama-cloud"); - assert.equal(mainChain[1].model, "kimi-k2.5:cloud"); + assert.equal(mainChain[1].model, "kimi-k2.6"); assert.equal(mainChain[1].priority, 1); - assert.equal(mainChain[2].provider, "opencode-go"); - assert.equal(mainChain[2].model, "kimi-k2.5"); - assert.equal(mainChain[2].priority, 2); + assert.ok( + mainChain.every((entry) => !entry.model.includes("k2.5")), + "main chain must not fall back from K2.6 to K2.5", + ); } finally { rmSync(dir, { recursive: true, force: true }); } @@ -379,7 +380,7 @@ test("hardcoded main chain coexists with blender-computed per-unit-type chains", // Hardcoded main chain present assert.ok(Array.isArray(chains.main), "main chain present"); - assert.equal(chains.main.length, 3); + assert.equal(chains.main.length, 2); // Blender-computed per-unit-type chain also present assert.ok(Array.isArray(chains.planning), "planning chain present"); diff --git a/src/resources/extensions/sf/model-identity.ts b/src/resources/extensions/sf/model-identity.ts index 43553c538..20529ec09 100644 --- a/src/resources/extensions/sf/model-identity.ts +++ b/src/resources/extensions/sf/model-identity.ts @@ -22,9 +22,21 @@ export function normalizedModelName(model: { }): string { const provider = model.provider?.toLowerCase(); const id = model.id.toLowerCase(); - if (provider === "kimi-coding" && id === "kimi-for-coding") + if ( + (provider === "kimi-coding" && id === "kimi-for-coding") || + id === "kimi-k2.6" || + id === "kimi-k2.6:cloud" || + id === "kimi-k2.6-cloud" || + id === "moonshotai/kimi-k2.6" + ) return "Kimi K2.6"; - if (provider === "kimi-coding" && id === "k2p5") return "Kimi K2.5"; + if ( + (provider === "kimi-coding" && id === "k2p5") || + id === "kimi-k2.5" || + id === "kimi-k2.5:cloud" || + id === "moonshotai/kimi-k2.5" + ) + return "Kimi K2.5"; if (model.name?.trim()) return model.name.trim(); return model.id; } diff --git a/src/resources/extensions/sf/tests/auto-model-selection.test.ts b/src/resources/extensions/sf/tests/auto-model-selection.test.ts index 5f4594918..62793602a 100644 --- a/src/resources/extensions/sf/tests/auto-model-selection.test.ts +++ b/src/resources/extensions/sf/tests/auto-model-selection.test.ts @@ -319,7 +319,7 @@ test("resolveModelId: bare GLM IDs fall back when zai lacks that exact model", ( test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggregators", () => { const availableModels = [ - { id: "kimi-k2.6:cloud", provider: "ollama" }, + { id: "kimi-k2.6", provider: "ollama-cloud" }, { id: "kimi-for-coding", provider: "kimi-coding" }, { id: "kimi-k2.6", provider: "opencode-go" }, ]; @@ -330,6 +330,18 @@ test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggrega assert.equal(result.id, "kimi-for-coding"); }); +test("resolveModelId: bare Kimi K2.6 can resolve Ollama Cloud exact version when present", () => { + const availableModels = [ + { id: "kimi-k2.5", provider: "ollama-cloud" }, + { id: "kimi-k2.6", provider: "ollama-cloud" }, + ]; + + const result = resolveModelId("kimi-k2.6", availableModels, "ollama-cloud"); + assert.ok(result, "should resolve exact Kimi K2.6"); + assert.equal(result.provider, "ollama-cloud"); + assert.equal(result.id, "kimi-k2.6"); +}); + test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => { const availableModels = [ { id: "kimi-for-coding", provider: "kimi-coding" }, diff --git a/src/resources/extensions/sf/tests/model-identity.test.ts b/src/resources/extensions/sf/tests/model-identity.test.ts index da5d12b20..a6fec39f7 100644 --- a/src/resources/extensions/sf/tests/model-identity.test.ts +++ b/src/resources/extensions/sf/tests/model-identity.test.ts @@ -16,6 +16,15 @@ test("model identity: Kimi Code wire id displays as Kimi K2.6", () => { ); }); +test("model identity: Ollama Cloud live K2.6 id displays as Kimi K2.6", () => { + const model = { provider: "ollama-cloud", id: "kimi-k2.6" }; + assert.equal(normalizedModelName(model), "Kimi K2.6"); + assert.equal( + formatModelIdentity(model), + "Kimi K2.6 (ollama-cloud/kimi-k2.6)", + ); +}); + test("model identity: K2.5 remains distinct from K2.6", () => { const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" }; assert.equal(normalizedModelName(model), "Kimi K2.5");