diff --git a/packages/pi-ai/scripts/generate-models.ts b/packages/pi-ai/scripts/generate-models.ts index 2bc9a7232..039e45f05 100644 --- a/packages/pi-ai/scripts/generate-models.ts +++ b/packages/pi-ai/scripts/generate-models.ts @@ -1269,20 +1269,24 @@ async function generateModels() { const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding"; const kimiCodingModels: Model<"anthropic-messages">[] = [ { - id: "kimi-k2-thinking", - name: "Kimi K2 Thinking", + id: "kimi-for-coding", + name: "Kimi K2.6", api: "anthropic-messages", provider: "kimi-coding", baseUrl: KIMI_CODING_BASE_URL, reasoning: true, - input: ["text"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + input: ["text", "image"], + capabilities: { thinkingNoBudget: true }, + // Kimi Code is subscription-backed, but SF ranking uses the normal + // pay-as-you-go Kimi K2.6 market price to compare models fairly. + // Source: OpenRouter moonshotai/kimi-k2.6, 2026-04-30. + cost: { input: 0.7448, output: 4.655, cacheRead: 0, cacheWrite: 0 }, contextWindow: 262144, maxTokens: 32768, }, { - id: "k2p5", - name: "Kimi K2.5", + id: "kimi-k2-thinking", + name: "Kimi K2 Thinking", api: "anthropic-messages", provider: "kimi-coding", baseUrl: KIMI_CODING_BASE_URL, diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index ae5208156..ac53a05cd 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -4626,26 +4626,8 @@ export const MODELS = { input: ["text", "image"], capabilities: { thinkingNoBudget: true }, cost: { - input: 0.6, - output: 2.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 32768, - } satisfies Model<"anthropic-messages">, - "k2p5": { - id: "k2p5", - name: "Kimi K2.5", - api: "anthropic-messages", - provider: "kimi-coding", - baseUrl: "https://api.kimi.com/coding", - reasoning: true, - input: ["text", "image"], - capabilities: { thinkingNoBudget: true }, - cost: { - input: 0, - output: 0, + input: 0.7448, + output: 4.655, cacheRead: 0, cacheWrite: 0, }, diff --git a/packages/pi-ai/src/models.test.ts b/packages/pi-ai/src/models.test.ts index 2969ae59a..bd38c3d66 100644 --- a/packages/pi-ai/src/models.test.ts +++ b/packages/pi-ai/src/models.test.ts @@ -122,6 +122,7 @@ describe("model registry — xiaomi provider", () => { const models = getModels("xiaomi" as any); const ids = models.map((m) => m.id).sort(); assert.deepEqual(ids, [ + "mimo-v2-flash", "mimo-v2-omni", "mimo-v2-pro", "mimo-v2.5", @@ -165,6 +166,14 @@ describe("model registry — kimi-coding provider", () => { assert.equal(model.baseUrl, "https://api.kimi.com/coding"); assert.equal(model.contextWindow, 262144); }); + + it("kimi-coding uses market comparison pricing for Kimi K2.6", () => { + const model = getModel("kimi-coding" as any, "kimi-for-coding" as any); + assert.ok(model, "Expected getModel to return kimi-coding/kimi-for-coding"); + assert.equal(model.name, "Kimi K2.6"); + assert.equal(model.cost.input, 0.7448); + assert.equal(model.cost.output, 4.655); + }); }); // ═══════════════════════════════════════════════════════════════════════════ diff --git a/src/resources/extensions/sf/auto-model-selection.ts b/src/resources/extensions/sf/auto-model-selection.ts index 94d430cbf..1f331d552 100644 --- a/src/resources/extensions/sf/auto-model-selection.ts +++ b/src/resources/extensions/sf/auto-model-selection.ts @@ -148,7 +148,7 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{ }> = [ { match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] }, { - match: /^kimi-|^k2p5$/i, + match: /^kimi-/i, providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"], }, { match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] }, @@ -176,12 +176,11 @@ function preferredBareModelIds(modelId: string): readonly string[] | undefined { ]; } if ( - lower === "k2p5" || lower === "kimi-k2.5" || lower === "kimi-k2.5:cloud" || lower === "moonshotai/kimi-k2.5" ) { - return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud", "k2p5"]; + return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud"]; } return undefined; } @@ -227,12 +226,10 @@ function bareModelIdAliases(modelId: string): Set { aliases.add("moonshotai/kimi-k2.6"); } if ( - lower === "k2p5" || lower === "kimi-k2.5" || lower === "kimi-k2.5:cloud" || lower === "moonshotai/kimi-k2.5" ) { - aliases.add("k2p5"); aliases.add("kimi-k2.5"); aliases.add("kimi-k2.5:cloud"); aliases.add("moonshotai/kimi-k2.5"); diff --git a/src/resources/extensions/sf/benchmark-selector.ts b/src/resources/extensions/sf/benchmark-selector.ts index ed80ebbc3..1ae6ffc34 100644 --- a/src/resources/extensions/sf/benchmark-selector.ts +++ b/src/resources/extensions/sf/benchmark-selector.ts @@ -365,8 +365,8 @@ const BENCHMARK_KEY_ALIASES: Record = { "moonshotai/kimi-k2.6": "kimi-k2.6", "kimi-k2.6:cloud": "kimi-k2.6", "kimi-k2.6-cloud": "kimi-k2.6", - // Kimi Code / aggregator wire IDs. The benchmark identity is Kimi K2.5. - k2p5: "kimi-k2.5", + // Kimi aggregator wire IDs. Kimi Code's `kimi-for-coding` is K2.6 above. + "kimi-k2.5": "kimi-k2.5", "moonshotai/kimi-k2.5": "kimi-k2.5", "moonshotai.kimi-k2.5": "kimi-k2.5", "kimi-k2.5:cloud": "kimi-k2.5", diff --git a/src/resources/extensions/sf/learning/bayesian-blender.mjs b/src/resources/extensions/sf/learning/bayesian-blender.mjs index 70042bb6e..a15373fe5 100644 --- a/src/resources/extensions/sf/learning/bayesian-blender.mjs +++ b/src/resources/extensions/sf/learning/bayesian-blender.mjs @@ -33,6 +33,17 @@ const NEUTRAL_OBSERVED_SCORE = 50; const SCORE_SCALE = 100; const UNTRIED_MODEL_BONUS = 1000; const DEFAULT_MAX_RETRIES = 5; +const BENCHMARK_ID_ALIASES = Object.freeze({ + "kimi-for-coding": "kimi-k2.6", + "moonshotai/kimi-k2.6": "kimi-k2.6", + "kimi-k2.6:cloud": "kimi-k2.6", + "kimi-k2.6-cloud": "kimi-k2.6", + "kimi-k2.5": "kimi-k2.5", + "moonshotai/kimi-k2.5": "kimi-k2.5", + "moonshotai.kimi-k2.5": "kimi-k2.5", + "kimi-k2.5:cloud": "kimi-k2.5", + "kimi-k2.5-cloud": "kimi-k2.5", +}); /** * Core blend: α · prior + (1 - α) · observed @@ -151,7 +162,7 @@ export function computeObservedScore( /** * Full ranking of eligible models for a unit type. * - * @param {string[]} eligibleModels - e.g. ["kimi-coding/k2p5", "minimax/MiniMax-M2.7"] + * @param {string[]} eligibleModels - e.g. ["kimi-coding/kimi-for-coding", "minimax/MiniMax-M2.7"] * @param {string} unitType - e.g. "execute-task" (currently informational) * @param {Object} priorsByModel - {modelId: priorScore (0-100)} — from loadCapabilityOverrides * @param {Object} observedByModel - {modelId: AggregatedStats} — from outcome-aggregator @@ -218,8 +229,8 @@ export function blendedRanking( /** * Helper: map a model id to its bare name for benchmark lookup. - * "kimi-coding/k2p5" → "k2p5" - * "k2p5" → "k2p5" + * "kimi-coding/kimi-for-coding" → "kimi-k2.6" + * "kimi-k2.5" → "kimi-k2.5" * "ollama-cloud/qwen3-coder:480b" → "qwen3-coder:480b" * * @param {string} modelId @@ -228,7 +239,8 @@ export function blendedRanking( export function stripProviderPrefix(modelId) { const slashIndex = modelId.indexOf("/"); if (slashIndex === -1) { - return modelId; + return BENCHMARK_ID_ALIASES[modelId.toLowerCase()] ?? modelId; } - return modelId.slice(slashIndex + 1); + const stripped = modelId.slice(slashIndex + 1); + return BENCHMARK_ID_ALIASES[stripped.toLowerCase()] ?? stripped; } diff --git a/src/resources/extensions/sf/learning/bayesian-blender.test.mjs b/src/resources/extensions/sf/learning/bayesian-blender.test.mjs index a37c915e6..091b367e8 100644 --- a/src/resources/extensions/sf/learning/bayesian-blender.test.mjs +++ b/src/resources/extensions/sf/learning/bayesian-blender.test.mjs @@ -263,12 +263,12 @@ test("blendedRanking: result entries have all expected fields", () => { // ---------- stripProviderPrefix ---------- -test("stripProviderPrefix: 'kimi-coding/k2p5' → 'k2p5'", () => { - assert.equal(stripProviderPrefix("kimi-coding/k2p5"), "k2p5"); +test("stripProviderPrefix: 'kimi-coding/kimi-for-coding' → 'kimi-k2.6'", () => { + assert.equal(stripProviderPrefix("kimi-coding/kimi-for-coding"), "kimi-k2.6"); }); -test("stripProviderPrefix: 'k2p5' (no prefix) → 'k2p5'", () => { - assert.equal(stripProviderPrefix("k2p5"), "k2p5"); +test("stripProviderPrefix: 'kimi-k2.5' (no prefix) → 'kimi-k2.5'", () => { + assert.equal(stripProviderPrefix("kimi-k2.5"), "kimi-k2.5"); }); test("stripProviderPrefix: 'ollama-cloud/qwen3-coder:480b' → 'qwen3-coder:480b'", () => { diff --git a/src/resources/extensions/sf/learning/data/model-benchmarks.json b/src/resources/extensions/sf/learning/data/model-benchmarks.json index 40b92dfc0..cb4e8855d 100644 --- a/src/resources/extensions/sf/learning/data/model-benchmarks.json +++ b/src/resources/extensions/sf/learning/data/model-benchmarks.json @@ -189,7 +189,7 @@ "long_context_ruler": null, "arena_elo": null, "instruction_following": null, - "source": "Moonshot Kimi K2.5 semantic benchmark key; Kimi Code wire ID is k2p5", + "source": "Moonshot Kimi K2.5 semantic benchmark key; provider routes should use real K2.5 model IDs", "context_window": 262144, "max_output_tokens": 32768 }, diff --git a/src/resources/extensions/sf/learning/fallback-chain-writer.mjs b/src/resources/extensions/sf/learning/fallback-chain-writer.mjs index c2e633752..d9ba7a0c5 100644 --- a/src/resources/extensions/sf/learning/fallback-chain-writer.mjs +++ b/src/resources/extensions/sf/learning/fallback-chain-writer.mjs @@ -155,8 +155,9 @@ function splitProviderModel(fullModelId) { * `glm-5`) into concrete pi-ai FallbackChainEntry records. * * Example: - * enabledModels = ["kimi-coding/k2p5", "opencode-go/k2p5", "zai/glm-5"] - * → { kimi-k2.5: [{provider:"kimi-coding", model:"k2p5"}, {provider:"opencode-go", model:"k2p5"}], + * enabledModels = ["kimi-coding/kimi-for-coding", "opencode-go/kimi-k2.5", "zai/glm-5"] + * → { kimi-k2.6: [{provider:"kimi-coding", model:"kimi-for-coding"}], + * kimi-k2.5: [{provider:"opencode-go", model:"kimi-k2.5"}], * glm-5: [{provider:"zai", model:"glm-5"}] } * * Matching is case-sensitive. Ollama-cloud style IDs with `:cloud` suffix @@ -171,7 +172,7 @@ const BENCHMARK_INDEX_ALIASES = Object.freeze({ "kimi-k2.6:cloud": "kimi-k2.6", "kimi-k2.6-cloud": "kimi-k2.6", "moonshotai/kimi-k2.6": "kimi-k2.6", - k2p5: "kimi-k2.5", + "kimi-k2.5": "kimi-k2.5", "kimi-k2.5:cloud": "kimi-k2.5", "kimi-k2.5-cloud": "kimi-k2.5", "moonshotai/kimi-k2.5": "kimi-k2.5", @@ -254,7 +255,7 @@ function readEnabledModels(settingsPath) { * FallbackChainEntry records. For each rank position, emits one entry per * concrete (provider, model) pair that matches the benchmark key. * - * - Pre-prefixed IDs (`kimi-coding/k2p5`) produce exactly one entry. + * - Pre-prefixed IDs (`kimi-coding/kimi-for-coding`) produce exactly one entry. * - Semantic IDs (`kimi-k2.5`, `glm-5`) produce one entry per provider offering * that model or a known wire alias in `enabledModels` — so a model available via multiple * providers automatically becomes multiple parallel fallback options diff --git a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs index bb3e21523..b4e270983 100644 --- a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs +++ b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs @@ -56,7 +56,7 @@ test("writeFallbackChains produces entries with integer priorities (no undefined const { dir, settingsPath } = makeTempSettingsDir(); try { const overrides = { - "kimi-coding/k2p5": { reasoning: 90 }, + "kimi-coding/kimi-for-coding": { reasoning: 90 }, "minimax/MiniMax-M2.7": { reasoning: 80 }, "zai/glm-5.1": { reasoning: 70 }, }; @@ -178,8 +178,8 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider JSON.stringify( { enabledModels: [ - "kimi-coding/k2p5", - "opencode-go/k2p5", + "kimi-coding/kimi-for-coding", + "opencode-go/kimi-k2.5", "ollama-cloud/kimi-k2.5:cloud", "zai/glm-5", "ollama-cloud/glm-5:cloud", @@ -191,9 +191,10 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider ); // Semantic-id overrides as they appear in model-benchmarks.json. - // `kimi-k2.5` exercises Kimi Code's `k2p5` wire alias and the - // `:cloud` stripped-suffix match. + // Kimi K2.6 exercises Kimi Code's wire route. Kimi K2.5 exercises + // real K2.5 provider routes and the `:cloud` stripped-suffix match. const overrides = { + "kimi-k2.6": { __benchmarks: { bench_p: 95 } }, "kimi-k2.5": { __benchmarks: { bench_p: 90 } }, "glm-5": { __benchmarks: { bench_p: 80 } }, }; @@ -214,14 +215,13 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider (e) => `${e.provider}/${e.model}`, ); - // kimi-k2.5 should expand to kimi-coding/k2p5 AND opencode-go/k2p5. assert.ok( - providerModelPairs.includes("kimi-coding/k2p5"), - "kimi-coding/k2p5 present", + providerModelPairs.includes("kimi-coding/kimi-for-coding"), + "kimi-k2.6 expanded to Kimi Code route", ); assert.ok( - providerModelPairs.includes("opencode-go/k2p5"), - "opencode-go/k2p5 present", + providerModelPairs.includes("opencode-go/kimi-k2.5"), + "kimi-k2.5 expanded to real K2.5 provider route", ); // glm-5 should expand to zai/glm-5 AND ollama-cloud/glm-5:cloud @@ -299,7 +299,7 @@ test("writeFallbackChains warns via log when project-level .sf/agent/settings.js const warnings = []; try { const deps = makeDeps({ - overrides: { "kimi-coding/k2p5": { reasoning: 90 } }, + overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } }, log: (msg) => warnings.push(msg), }); const result = writeFallbackChains(globalSettingsPath, deps); @@ -355,7 +355,7 @@ test("hardcoded main chain coexists with blender-computed per-unit-type chains", settingsPath, JSON.stringify( { - enabledModels: ["kimi-coding/k2p5", "zai/glm-5"], + enabledModels: ["kimi-coding/kimi-for-coding", "zai/glm-5"], }, null, 2, @@ -410,7 +410,7 @@ test("writeFallbackChains does NOT warn when cwd is the parent of the global set writeFileSync( globalSettingsPath, JSON.stringify({ - enabledModels: ["kimi-coding/k2p5"], + enabledModels: ["kimi-coding/kimi-for-coding"], fallback: { enabled: true, chains: {} }, }), ); @@ -420,7 +420,7 @@ test("writeFallbackChains does NOT warn when cwd is the parent of the global set const warnings = []; try { const deps = makeDeps({ - overrides: { "kimi-coding/k2p5": { reasoning: 90 } }, + overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } }, log: (msg) => warnings.push(msg), }); const result = writeFallbackChains(globalSettingsPath, deps); @@ -459,7 +459,7 @@ test("writeFallbackChains does NOT warn when project settings has no fallback bl const warnings = []; try { const deps = makeDeps({ - overrides: { "kimi-coding/k2p5": { reasoning: 90 } }, + overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } }, log: (msg) => warnings.push(msg), }); const result = writeFallbackChains(globalSettingsPath, deps); diff --git a/src/resources/extensions/sf/learning/hook-handler.test.mjs b/src/resources/extensions/sf/learning/hook-handler.test.mjs index 86aa4a6af..c8def06fe 100644 --- a/src/resources/extensions/sf/learning/hook-handler.test.mjs +++ b/src/resources/extensions/sf/learning/hook-handler.test.mjs @@ -307,8 +307,8 @@ test("registerRoutingHook: registers handler + reload command and routes a simul unitType: "execute-task", unitId: "test-unit-1", classification: { tier: "primary", reason: "test", downgraded: false }, - eligibleModels: ["kimi-coding/k2p5", "minimax/MiniMax-M2.7"], - phaseConfig: { primary: "kimi-coding/k2p5", fallbacks: [] }, + eligibleModels: ["kimi-coding/kimi-for-coding", "minimax/MiniMax-M2.7"], + phaseConfig: { primary: "kimi-coding/kimi-for-coding", fallbacks: [] }, }; const ctx = { ui: { notify: () => {} }, hasUI: false }; diff --git a/src/resources/extensions/sf/learning/loadCapabilityOverrides.mjs b/src/resources/extensions/sf/learning/loadCapabilityOverrides.mjs index 1b42ac457..f87659f7d 100644 --- a/src/resources/extensions/sf/learning/loadCapabilityOverrides.mjs +++ b/src/resources/extensions/sf/learning/loadCapabilityOverrides.mjs @@ -163,7 +163,7 @@ const BENCHMARK_ID_ALIASES = Object.freeze({ "moonshotai/kimi-k2.6": "kimi-k2.6", "kimi-k2.6:cloud": "kimi-k2.6", "kimi-k2.6-cloud": "kimi-k2.6", - k2p5: "kimi-k2.5", + "kimi-k2.5": "kimi-k2.5", "moonshotai/kimi-k2.5": "kimi-k2.5", "moonshotai.kimi-k2.5": "kimi-k2.5", "kimi-k2.5:cloud": "kimi-k2.5", @@ -171,7 +171,7 @@ const BENCHMARK_ID_ALIASES = Object.freeze({ }); /** - * Strip provider prefix from a model id. `kimi-coding/k2p5` -> `k2p5`. + * Strip provider prefix from a model id. `kimi-coding/kimi-for-coding` -> `kimi-k2.6`. * * @param {string} modelId * @returns {string} diff --git a/src/resources/extensions/sf/learning/loadCapabilityOverrides.test.mjs b/src/resources/extensions/sf/learning/loadCapabilityOverrides.test.mjs index c545a2dcc..55e4e8ed9 100644 --- a/src/resources/extensions/sf/learning/loadCapabilityOverrides.test.mjs +++ b/src/resources/extensions/sf/learning/loadCapabilityOverrides.test.mjs @@ -268,20 +268,20 @@ test("loadCapabilityOverrides: computeUnitTypeScore resolves provider wire ids t const { overrides, weights } = await loadCapabilityOverrides(); assert.ok(overrides["kimi-k2.5"], "canonical Kimi K2.5 benchmark key exists"); assert.equal( - overrides.k2p5, + overrides["kimi-coding/kimi-for-coding"], undefined, - "k2p5 is a wire alias, not a benchmark key", + "Kimi Code provider route is not a benchmark key", ); const prefixed = computeUnitTypeScore( - "kimi-coding/k2p5", + "kimi-coding/kimi-for-coding", "execute-task", overrides, weights, ); - const wire = computeUnitTypeScore("k2p5", "execute-task", overrides, weights); + const wire = computeUnitTypeScore("kimi-for-coding", "execute-task", overrides, weights); const semantic = computeUnitTypeScore( - "kimi-k2.5", + "kimi-k2.6", "execute-task", overrides, weights, @@ -289,7 +289,7 @@ test("loadCapabilityOverrides: computeUnitTypeScore resolves provider wire ids t assert.strictEqual( prefixed, semantic, - "provider wire id resolves to semantic Kimi K2.5 score", + "provider wire id resolves to semantic Kimi K2.6 score", ); assert.strictEqual( wire, diff --git a/src/resources/extensions/sf/learning/outcome-aggregator.mjs b/src/resources/extensions/sf/learning/outcome-aggregator.mjs index d08c1894a..813169363 100644 --- a/src/resources/extensions/sf/learning/outcome-aggregator.mjs +++ b/src/resources/extensions/sf/learning/outcome-aggregator.mjs @@ -172,7 +172,7 @@ function rowToStats(row, modelId, unitType, windowDays) { * @returns {AggregatedStats} * * @example - * const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", {rollingDays: 30}); + * const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", {rollingDays: 30}); * // {modelId, unitType, sample_count: 12, success_rate: 0.83, ...} */ export function aggregateOutcomes(db, modelId, unitType, opts = {}) { diff --git a/src/resources/extensions/sf/learning/outcome-recorder.mjs b/src/resources/extensions/sf/learning/outcome-recorder.mjs index d86a12acf..56d1b2e77 100644 --- a/src/resources/extensions/sf/learning/outcome-recorder.mjs +++ b/src/resources/extensions/sf/learning/outcome-recorder.mjs @@ -57,7 +57,7 @@ const INSERT_SQL = ` * Validated outcome shape for insertion. * * @typedef {Object} Outcome - * @property {string} modelId e.g. "kimi-coding/k2p5" + * @property {string} modelId e.g. "kimi-coding/kimi-for-coding" * @property {string} provider e.g. "kimi-coding" * @property {string} unitType e.g. "research-slice", "execute-task" * @property {string} unitId e.g. "M001/S01" or "M001/S01/T01" @@ -79,7 +79,7 @@ const INSERT_SQL = ` * @returns {{valid: boolean, errors: string[]}} * * @example - * const r = validateOutcome({modelId: "k2p5", provider: "kimi", unitType: "execute-task", unitId: "M001/S01/T01", succeeded: true}); + * const r = validateOutcome({modelId: "kimi-k2.5", provider: "kimi", unitType: "execute-task", unitId: "M001/S01/T01", succeeded: true}); * // r.valid === true */ export function validateOutcome(outcome) { @@ -206,7 +206,7 @@ function buildInsertParams(outcome) { * * @example * recordOutcome(db, { - * modelId: "kimi-coding/k2p5", + * modelId: "kimi-coding/kimi-for-coding", * provider: "kimi-coding", * unitType: "execute-task", * unitId: "M001/S01/T01", diff --git a/src/resources/extensions/sf/learning/outcome-recorder.test.mjs b/src/resources/extensions/sf/learning/outcome-recorder.test.mjs index 40612af24..d834df2b8 100644 --- a/src/resources/extensions/sf/learning/outcome-recorder.test.mjs +++ b/src/resources/extensions/sf/learning/outcome-recorder.test.mjs @@ -229,7 +229,7 @@ function runRecentSelect(sql, params, rows) { function minimalOutcome(overrides = {}) { return { - modelId: "kimi-coding/k2p5", + modelId: "kimi-coding/kimi-for-coding", provider: "kimi-coding", unitType: "execute-task", unitId: "M001/S01/T01", @@ -423,7 +423,7 @@ test("aggregateOutcomes computes success_rate correctly from multiple rows", () minimalOutcome({ succeeded: false, recorded_at: now - 1000 }), ); - const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", { + const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", { now, }); assert.equal(stats.sample_count, 4); @@ -445,7 +445,7 @@ test("aggregateOutcomes excludes rows outside the rolling window", () => { minimalOutcome({ succeeded: false, recorded_at: now - 60 * oneDayMs }), ); - const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", { + const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", { now, rollingDays: 30, }); @@ -460,7 +460,7 @@ test("aggregateOutcomes verification_pass_rate is null when no verification data db, minimalOutcome({ verification_passed: null, recorded_at: now - 1000 }), ); - const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", { + const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", { now, }); assert.equal(stats.verification_pass_rate, null); diff --git a/src/resources/extensions/sf/metrics.ts b/src/resources/extensions/sf/metrics.ts index 39c25cf6a..114470eba 100644 --- a/src/resources/extensions/sf/metrics.ts +++ b/src/resources/extensions/sf/metrics.ts @@ -42,7 +42,6 @@ function inferProviderFromBareModelId(modelId: string): string { const lower = modelId.toLowerCase(); if ( lower === "kimi-for-coding" || - lower === "k2p5" || lower === "kimi-k2-thinking" ) return "kimi-coding"; diff --git a/src/resources/extensions/sf/model-identity.ts b/src/resources/extensions/sf/model-identity.ts index ca57af839..14dfb5d33 100644 --- a/src/resources/extensions/sf/model-identity.ts +++ b/src/resources/extensions/sf/model-identity.ts @@ -33,8 +33,6 @@ export function normalizedModelName(model: { ) return "Kimi K2.6"; if ( - (provider === "kimi-coding" && id === "k2p5") || - id === "k2p5" || id === "kimi-k2.5" || id === "kimi-k2.5:cloud" || id === "moonshotai/kimi-k2.5" diff --git a/src/resources/extensions/sf/tests/auto-model-selection.test.ts b/src/resources/extensions/sf/tests/auto-model-selection.test.ts index 7530e1df1..16a56e110 100644 --- a/src/resources/extensions/sf/tests/auto-model-selection.test.ts +++ b/src/resources/extensions/sf/tests/auto-model-selection.test.ts @@ -426,17 +426,16 @@ test("resolveModelId: bare Kimi K2.6 can resolve Ollama Cloud exact version when assert.equal(result.id, "kimi-k2.6"); }); -test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => { +test("resolveModelId: bare Kimi K2.5 IDs do not route to Kimi Code K2.6", () => { const availableModels = [ { id: "kimi-for-coding", provider: "kimi-coding" }, - { id: "k2p5", provider: "kimi-coding" }, { id: "kimi-k2.5", provider: "opencode-go" }, ]; const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go"); assert.ok(result, "should resolve a Kimi model"); - assert.equal(result.provider, "kimi-coding"); - assert.equal(result.id, "k2p5"); + assert.equal(result.provider, "opencode-go"); + assert.equal(result.id, "kimi-k2.5"); }); test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => { @@ -451,32 +450,30 @@ test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => { assert.equal(result.id, "kimi-k2.5"); }); -test("resolveModelId: bare k2p5 prefers direct Kimi Code when available", () => { +test("resolveModelId: bare Kimi K2.5 still uses a real K2.5 provider when Kimi Code is preferred", () => { const availableModels = [ - { id: "k2p5", provider: "kimi-coding" }, + { id: "kimi-for-coding", provider: "kimi-coding" }, { id: "kimi-k2.5", provider: "opencode-go" }, ]; - const result = resolveModelId("k2p5", availableModels, "kimi-coding"); + const result = resolveModelId("kimi-k2.5", availableModels, "kimi-coding"); assert.ok(result, "should resolve a real K2.5 model"); - assert.equal(result.provider, "kimi-coding"); - assert.equal(result.id, "k2p5"); + assert.equal(result.provider, "opencode-go"); + assert.equal(result.id, "kimi-k2.5"); }); -test("resolveModelId: explicit Kimi Code k2p5 route stays on K2.5 wire id", () => { +test("resolveModelId: explicit provider Kimi K2.5 route does not alias to K2.6", () => { const availableModels = [ - { id: "k2p5", provider: "kimi-coding" }, { id: "kimi-for-coding", provider: "kimi-coding" }, + { id: "kimi-k2.5", provider: "opencode-go" }, ]; const result = resolveModelId( - "kimi-coding/k2p5", + "kimi-coding/kimi-k2.5", availableModels, "kimi-coding", ); - assert.ok(result, "should resolve Kimi Code K2.5 wire route"); - assert.equal(result.provider, "kimi-coding"); - assert.equal(result.id, "k2p5"); + assert.equal(result, undefined); }); test("resolveModelId: bare MiniMax IDs prefer minimax over minimax-cn and aggregators", () => { diff --git a/src/resources/extensions/sf/tests/benchmark-selector.test.ts b/src/resources/extensions/sf/tests/benchmark-selector.test.ts index 7f7ea926c..4525fd754 100644 --- a/src/resources/extensions/sf/tests/benchmark-selector.test.ts +++ b/src/resources/extensions/sf/tests/benchmark-selector.test.ts @@ -443,7 +443,7 @@ describe("benchmark-selector", () => { aime_2026: 99, gpqa: 99, }, - k2p5: { + "kimi-for-coding": { swe_bench: 1, live_code_bench: 1, human_eval: 1, @@ -454,15 +454,15 @@ describe("benchmark-selector", () => { const r = selectByBenchmarks( "execute-task", [ - { provider: "kimi-coding", id: "k2p5" }, + { provider: "opencode-go", id: "kimi-k2.5" }, { provider: "openrouter", id: "moonshotai/kimi-k2.5" }, ], { benchmarks: fixture }, ); assert.ok(r); assert.ok( - r.scores["kimi-coding/k2p5"] > 90, - "expected Kimi Code k2p5 route to score from kimi-k2.5", + r.scores["opencode-go/kimi-k2.5"] > 90, + "expected provider Kimi K2.5 route to score from kimi-k2.5", ); assert.ok( r.scores["openrouter/moonshotai/kimi-k2.5"] > 90, diff --git a/src/resources/extensions/sf/tests/model-identity.test.ts b/src/resources/extensions/sf/tests/model-identity.test.ts index ecb2a1c30..8533873f8 100644 --- a/src/resources/extensions/sf/tests/model-identity.test.ts +++ b/src/resources/extensions/sf/tests/model-identity.test.ts @@ -25,12 +25,6 @@ test("model identity: Ollama Cloud live K2.6 id displays as Kimi K2.6", () => { ); }); -test("model identity: Kimi Code k2p5 wire route displays as K2.5", () => { - const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" }; - assert.equal(normalizedModelName(model), "Kimi K2.5"); - assert.equal(formatModelIdentity(model), "Kimi K2.5 (kimi-coding/k2p5)"); -}); - test("model identity: K2.5 remains distinct from K2.6", () => { const model = { provider: "opencode-go", id: "kimi-k2.5" }; assert.equal(normalizedModelName(model), "Kimi K2.5");