fix(sf): use live ollama k2.6 routes

This commit is contained in:
Mikael Hugo 2026-04-29 21:38:51 +02:00
parent f78c3fb2b8
commit 9c4bf9b3e6
11 changed files with 226 additions and 39 deletions

View file

@ -22,6 +22,13 @@ describe("getDiscoveryAdapter", () => {
assert.equal(adapter.supportsDiscovery, true);
});
it("returns an adapter for ollama-cloud", () => {
const adapter = getDiscoveryAdapter("ollama-cloud");
assert.equal(adapter.provider, "ollama-cloud");
assert.equal(adapter.supportsDiscovery, true);
assert.equal(adapter.requiresAuthForDiscovery, false);
});
it("returns an adapter for openrouter", () => {
const adapter = getDiscoveryAdapter("openrouter");
assert.equal(adapter.provider, "openrouter");
@ -66,6 +73,7 @@ describe("getDiscoverableProviders", () => {
const providers = getDiscoverableProviders();
assert.ok(providers.includes("openai"));
assert.ok(providers.includes("ollama"));
assert.ok(providers.includes("ollama-cloud"));
assert.ok(providers.includes("openrouter"));
assert.ok(providers.includes("google"));
assert.ok(!providers.includes("anthropic"));
@ -92,6 +100,10 @@ describe("getDefaultTTL", () => {
assert.equal(getDefaultTTL("openai"), 60 * 60 * 1000);
});
it("returns 1 hour for ollama-cloud", () => {
assert.equal(getDefaultTTL("ollama-cloud"), 60 * 60 * 1000);
});
it("returns 1 hour for google", () => {
assert.equal(getDefaultTTL("google"), 60 * 60 * 1000);
});
@ -110,6 +122,7 @@ describe("getDefaultTTL", () => {
describe("DISCOVERY_TTLS", () => {
it("has expected keys", () => {
assert.ok("ollama" in DISCOVERY_TTLS);
assert.ok("ollama-cloud" in DISCOVERY_TTLS);
assert.ok("openai" in DISCOVERY_TTLS);
assert.ok("google" in DISCOVERY_TTLS);
assert.ok("openrouter" in DISCOVERY_TTLS);
@ -123,3 +136,43 @@ describe("DISCOVERY_TTLS", () => {
}
});
});
// ─── Ollama Cloud Adapter ───────────────────────────────────────────────────
describe("ollama-cloud discovery", () => {
it("uses the live OpenAI-compatible /v1/models endpoint", async () => {
const originalFetch = globalThis.fetch;
const calls: Array<{ url: string; headers?: HeadersInit }> = [];
globalThis.fetch = (async (
input: string | URL | Request,
init?: RequestInit,
) => {
calls.push({ url: String(input), headers: init?.headers });
return new Response(
JSON.stringify({
data: [
{ id: "kimi-k2.5", object: "model", owned_by: "ollama" },
{ id: "kimi-k2.6", object: "model", owned_by: "ollama" },
],
}),
{ status: 200 },
);
}) as typeof fetch;
try {
const adapter = getDiscoveryAdapter("ollama-cloud");
const models = await adapter.fetchModels("test-key");
assert.equal(calls[0]?.url, "https://ollama.com/v1/models");
assert.deepEqual(calls[0]?.headers, {
Authorization: "Bearer test-key",
});
assert.deepEqual(
models.map((m) => m.id),
["kimi-k2.5", "kimi-k2.6"],
);
} finally {
globalThis.fetch = originalFetch;
}
});
});

View file

@ -23,12 +23,14 @@ export interface DiscoveryResult {
export interface ProviderDiscoveryAdapter {
provider: string;
supportsDiscovery: boolean;
requiresAuthForDiscovery?: boolean;
fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]>;
}
/** Per-provider TTLs in milliseconds */
export const DISCOVERY_TTLS: Record<string, number> = {
ollama: 5 * 60 * 1000, // 5 minutes (local, models change often)
"ollama-cloud": 60 * 60 * 1000, // 1 hour
openai: 60 * 60 * 1000, // 1 hour
google: 60 * 60 * 1000, // 1 hour
openrouter: 60 * 60 * 1000, // 1 hour
@ -104,6 +106,35 @@ class OllamaDiscoveryAdapter implements ProviderDiscoveryAdapter {
}
}
// ─── Ollama Cloud Adapter ────────────────────────────────────────────────────
class OllamaCloudDiscoveryAdapter implements ProviderDiscoveryAdapter {
provider = "ollama-cloud";
supportsDiscovery = true;
requiresAuthForDiscovery = false;
async fetchModels(apiKey: string, baseUrl?: string): Promise<DiscoveredModel[]> {
const root = (baseUrl ?? "https://ollama.com").replace(/\/+$/, "");
const url = root.endsWith("/v1") ? `${root}/models` : `${root}/v1/models`;
const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
const response = await fetchWithTimeout(url, { headers });
if (!response.ok) {
throw new Error(`Ollama Cloud models API returned ${response.status}: ${response.statusText}`);
}
const data = (await response.json()) as {
data: Array<{ id: string; name?: string; owned_by?: string }>;
};
return (data.data ?? []).map((m) => ({
id: m.id,
name: m.name ?? m.id,
input: ["text" as const],
}));
}
}
// ─── OpenRouter Adapter ──────────────────────────────────────────────────────
class OpenRouterDiscoveryAdapter implements ProviderDiscoveryAdapter {
@ -209,6 +240,7 @@ class StaticDiscoveryAdapter implements ProviderDiscoveryAdapter {
const adapters: Record<string, ProviderDiscoveryAdapter> = {
openai: new OpenAIDiscoveryAdapter(),
ollama: new OllamaDiscoveryAdapter(),
"ollama-cloud": new OllamaCloudDiscoveryAdapter(),
openrouter: new OpenRouterDiscoveryAdapter(),
google: new GoogleDiscoveryAdapter(),
anthropic: new StaticDiscoveryAdapter("anthropic"),

View file

@ -1,16 +1,24 @@
import assert from "node:assert/strict";
import { mkdirSync, rmSync, writeFileSync } from "node:fs";
import { mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, it } from "node:test";
import { AuthStorage } from "./auth-storage.js";
import { ModelDiscoveryCache } from "./discovery-cache.js";
import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js";
import {
getDefaultTTL,
getDiscoverableProviders,
getDiscoveryAdapter,
} from "./model-discovery.js";
import { ModelRegistry } from "./model-registry.js";
let testDir: string;
beforeEach(() => {
testDir = join(tmpdir(), `model-registry-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
testDir = join(
tmpdir(),
`model-registry-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
mkdirSync(testDir, { recursive: true });
});
@ -53,15 +61,29 @@ describe("Discovery adapter resolution", () => {
const providers = getDiscoverableProviders();
for (const provider of providers) {
const adapter = getDiscoveryAdapter(provider);
assert.equal(adapter.supportsDiscovery, true, `${provider} should support discovery`);
assert.equal(
adapter.supportsDiscovery,
true,
`${provider} should support discovery`,
);
}
});
it("static adapters return empty model lists", async () => {
const staticProviders = ["anthropic", "bedrock", "azure-openai", "groq", "cerebras"];
const staticProviders = [
"anthropic",
"bedrock",
"azure-openai",
"groq",
"cerebras",
];
for (const provider of staticProviders) {
const adapter = getDiscoveryAdapter(provider);
assert.equal(adapter.supportsDiscovery, false, `${provider} should not support discovery`);
assert.equal(
adapter.supportsDiscovery,
false,
`${provider} should not support discovery`,
);
const models = await adapter.fetchModels("dummy-key");
assert.deepEqual(models, [], `${provider} should return empty models`);
}
@ -86,6 +108,40 @@ describe("AuthStorage — hasAuth for discovery providers", () => {
});
});
// ─── public model-list discovery ─────────────────────────────────────────────
describe("ModelRegistry — public discovery providers", () => {
it("discovers ollama-cloud models from live model listing without stored auth", async () => {
const originalFetch = globalThis.fetch;
globalThis.fetch = (async () =>
new Response(
JSON.stringify({
data: [{ id: "kimi-k2.5" }, { id: "kimi-k2.6" }],
}),
{ status: 200 },
)) as typeof fetch;
try {
const registry = new ModelRegistry(AuthStorage.inMemory({}), undefined);
const results = await registry.discoverModels(["ollama-cloud"]);
assert.equal(results[0]?.provider, "ollama-cloud");
assert.deepEqual(
results[0]?.models.map((m) => m.id),
["kimi-k2.5", "kimi-k2.6"],
);
assert.ok(
registry
.getAllWithDiscovered()
.some((m) => m.provider === "ollama-cloud" && m.id === "kimi-k2.6"),
"discovered Kimi K2.6 is retained as ollama-cloud/kimi-k2.6",
);
} finally {
globalThis.fetch = originalFetch;
}
});
});
// ─── cache persistence across instances ──────────────────────────────────────
describe("ModelDiscoveryCache — persistence", () => {
@ -123,7 +179,10 @@ describe("Discovery TTL configuration", () => {
it("ollama has shortest TTL (local models change often)", () => {
const ollamaTTL = getDefaultTTL("ollama");
const openaiTTL = getDefaultTTL("openai");
assert.ok(ollamaTTL < openaiTTL, "ollama TTL should be shorter than openai");
assert.ok(
ollamaTTL < openaiTTL,
"ollama TTL should be shorter than openai",
);
});
it("unknown providers get default TTL", () => {

View file

@ -956,7 +956,12 @@ export class ModelRegistry {
try {
const apiKey = await this.authStorage.getApiKey(providerName);
if (!apiKey && !this.isProviderRequestReady(providerName)) continue;
if (
!apiKey &&
!this.isProviderRequestReady(providerName) &&
adapter.requiresAuthForDiscovery !== false
)
continue;
const models = await adapter.fetchModels(apiKey ?? "", undefined);
this.discoveryCache.set(providerName, models);

View file

@ -148,7 +148,7 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{
{ match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] },
{
match: /^kimi-|^k2p5$/i,
providers: ["kimi-coding", "opencode", "opencode-go"],
providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"],
},
{ match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] },
{
@ -168,21 +168,24 @@ function preferredBareModelIds(modelId: string): readonly string[] | undefined {
lower === "kimi-for-coding" ||
lower === "kimi-k2.6" ||
lower === "kimi-k2.6:cloud" ||
lower === "kimi-k2.6-cloud" ||
lower === "moonshotai/kimi-k2.6"
) {
return [
"kimi-for-coding",
"kimi-k2.6",
"kimi-k2.6:cloud",
"kimi-k2.6-cloud",
"moonshotai/kimi-k2.6",
];
}
if (
lower === "k2p5" ||
lower === "kimi-k2.5" ||
lower === "kimi-k2.5:cloud" ||
lower === "moonshotai/kimi-k2.5"
) {
return ["k2p5", "kimi-k2.5", "moonshotai/kimi-k2.5"];
return ["k2p5", "kimi-k2.5", "kimi-k2.5:cloud", "moonshotai/kimi-k2.5"];
}
return undefined;
}
@ -214,26 +217,28 @@ function resolveFamilyPreferredBareModel<
function bareModelIdAliases(modelId: string): Set<string> {
const lower = modelId.toLowerCase();
const aliases = new Set([lower]);
if (lower === "kimi-for-coding" || lower === "kimi-k2.6") {
if (
lower === "kimi-for-coding" ||
lower === "kimi-k2.6" ||
lower === "kimi-k2.6:cloud" ||
lower === "kimi-k2.6-cloud" ||
lower === "moonshotai/kimi-k2.6"
) {
aliases.add("kimi-for-coding");
aliases.add("kimi-k2.6");
aliases.add("kimi-k2.6:cloud");
aliases.add("kimi-k2.6-cloud");
aliases.add("moonshotai/kimi-k2.6");
}
if (lower === "kimi-k2.6:cloud" || lower === "moonshotai/kimi-k2.6") {
aliases.add("kimi-for-coding");
aliases.add("kimi-k2.6");
aliases.add("kimi-k2.6:cloud");
aliases.add("moonshotai/kimi-k2.6");
}
if (lower === "kimi-k2.5" || lower === "moonshotai/kimi-k2.5") {
aliases.add("k2p5");
aliases.add("kimi-k2.5");
aliases.add("moonshotai/kimi-k2.5");
}
if (lower === "k2p5") {
if (
lower === "k2p5" ||
lower === "kimi-k2.5" ||
lower === "kimi-k2.5:cloud" ||
lower === "moonshotai/kimi-k2.5"
) {
aliases.add("k2p5");
aliases.add("kimi-k2.5");
aliases.add("kimi-k2.5:cloud");
aliases.add("moonshotai/kimi-k2.5");
}
return aliases;

View file

@ -1,5 +1,4 @@
[
{ "provider": "kimi-coding", "model": "kimi-for-coding", "priority": 0 },
{ "provider": "ollama-cloud", "model": "kimi-k2.5:cloud", "priority": 1 },
{ "provider": "opencode-go", "model": "kimi-k2.5", "priority": 2 }
{ "provider": "ollama-cloud", "model": "kimi-k2.6", "priority": 1 }
]

View file

@ -475,12 +475,12 @@ export function writeFallbackChains(settingsPath, deps) {
chainsByName[DEFAULT_CHAIN_NAME] = defaultEntries;
}
// Step 3b: hardcoded `main` chain — three provider routes for the user's
// primary model (Kimi K2.5). This is a provider-cover chain: every entry
// Step 3b: hardcoded `main` chain — provider routes for the user's
// primary model (Kimi K2.6). This is a provider-cover chain: every entry
// serves the same underlying model via a different provider, so the
// retry-handler can rotate past a 429'd provider without flipping to a
// different model family. If all three routes exhaust, tasks running on
// the main model fail (no cross-model fallback). Loaded from
// different model family. If every exact-version route exhausts, tasks
// running on the main model fail (no cross-model fallback). Loaded from
// `./data/primary-provider-chain.json` so the list is editable without
// touching code.
chainsByName[MAIN_CHAIN_NAME] = primaryProviderChainEntries;

View file

@ -318,7 +318,7 @@ test("writeFallbackChains warns via log when project-level .sf/agent/settings.js
}
});
test("writeFallbackChains always emits the hardcoded main chain with canonical kimi-for-coding primary route", () => {
test("writeFallbackChains always emits the hardcoded main chain with exact Kimi K2.6 routes", () => {
const { dir, settingsPath } = makeTempSettingsDir();
try {
// Deps deliberately minimal — no overrides, no enabledModels — so
@ -331,19 +331,20 @@ test("writeFallbackChains always emits the hardcoded main chain with canonical k
const mainChain = written.fallback.chains.main;
assert.ok(Array.isArray(mainChain), "main chain present");
assert.equal(mainChain.length, 3, "main chain has exactly 3 entries");
assert.equal(mainChain.length, 2, "main chain has exactly 2 entries");
assert.equal(mainChain[0].provider, "kimi-coding");
assert.equal(mainChain[0].model, "kimi-for-coding");
assert.equal(mainChain[0].priority, 0);
assert.equal(mainChain[1].provider, "ollama-cloud");
assert.equal(mainChain[1].model, "kimi-k2.5:cloud");
assert.equal(mainChain[1].model, "kimi-k2.6");
assert.equal(mainChain[1].priority, 1);
assert.equal(mainChain[2].provider, "opencode-go");
assert.equal(mainChain[2].model, "kimi-k2.5");
assert.equal(mainChain[2].priority, 2);
assert.ok(
mainChain.every((entry) => !entry.model.includes("k2.5")),
"main chain must not fall back from K2.6 to K2.5",
);
} finally {
rmSync(dir, { recursive: true, force: true });
}
@ -379,7 +380,7 @@ test("hardcoded main chain coexists with blender-computed per-unit-type chains",
// Hardcoded main chain present
assert.ok(Array.isArray(chains.main), "main chain present");
assert.equal(chains.main.length, 3);
assert.equal(chains.main.length, 2);
// Blender-computed per-unit-type chain also present
assert.ok(Array.isArray(chains.planning), "planning chain present");

View file

@ -22,9 +22,21 @@ export function normalizedModelName(model: {
}): string {
const provider = model.provider?.toLowerCase();
const id = model.id.toLowerCase();
if (provider === "kimi-coding" && id === "kimi-for-coding")
if (
(provider === "kimi-coding" && id === "kimi-for-coding") ||
id === "kimi-k2.6" ||
id === "kimi-k2.6:cloud" ||
id === "kimi-k2.6-cloud" ||
id === "moonshotai/kimi-k2.6"
)
return "Kimi K2.6";
if (provider === "kimi-coding" && id === "k2p5") return "Kimi K2.5";
if (
(provider === "kimi-coding" && id === "k2p5") ||
id === "kimi-k2.5" ||
id === "kimi-k2.5:cloud" ||
id === "moonshotai/kimi-k2.5"
)
return "Kimi K2.5";
if (model.name?.trim()) return model.name.trim();
return model.id;
}

View file

@ -319,7 +319,7 @@ test("resolveModelId: bare GLM IDs fall back when zai lacks that exact model", (
test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggregators", () => {
const availableModels = [
{ id: "kimi-k2.6:cloud", provider: "ollama" },
{ id: "kimi-k2.6", provider: "ollama-cloud" },
{ id: "kimi-for-coding", provider: "kimi-coding" },
{ id: "kimi-k2.6", provider: "opencode-go" },
];
@ -330,6 +330,18 @@ test("resolveModelId: bare Kimi K2.6 IDs prefer canonical Kimi Code over aggrega
assert.equal(result.id, "kimi-for-coding");
});
test("resolveModelId: bare Kimi K2.6 can resolve Ollama Cloud exact version when present", () => {
const availableModels = [
{ id: "kimi-k2.5", provider: "ollama-cloud" },
{ id: "kimi-k2.6", provider: "ollama-cloud" },
];
const result = resolveModelId("kimi-k2.6", availableModels, "ollama-cloud");
assert.ok(result, "should resolve exact Kimi K2.6");
assert.equal(result.provider, "ollama-cloud");
assert.equal(result.id, "kimi-k2.6");
});
test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => {
const availableModels = [
{ id: "kimi-for-coding", provider: "kimi-coding" },

View file

@ -16,6 +16,15 @@ test("model identity: Kimi Code wire id displays as Kimi K2.6", () => {
);
});
test("model identity: Ollama Cloud live K2.6 id displays as Kimi K2.6", () => {
const model = { provider: "ollama-cloud", id: "kimi-k2.6" };
assert.equal(normalizedModelName(model), "Kimi K2.6");
assert.equal(
formatModelIdentity(model),
"Kimi K2.6 (ollama-cloud/kimi-k2.6)",
);
});
test("model identity: K2.5 remains distinct from K2.6", () => {
const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" };
assert.equal(normalizedModelName(model), "Kimi K2.5");