Fix Kimi Code K2.6 routing and pricing
This commit is contained in:
parent
50975c19e0
commit
1dbd30c713
21 changed files with 101 additions and 108 deletions
|
|
@ -1269,20 +1269,24 @@ async function generateModels() {
|
|||
const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding";
|
||||
const kimiCodingModels: Model<"anthropic-messages">[] = [
|
||||
{
|
||||
id: "kimi-k2-thinking",
|
||||
name: "Kimi K2 Thinking",
|
||||
id: "kimi-for-coding",
|
||||
name: "Kimi K2.6",
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi-coding",
|
||||
baseUrl: KIMI_CODING_BASE_URL,
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
input: ["text", "image"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
// Kimi Code is subscription-backed, but SF ranking uses the normal
|
||||
// pay-as-you-go Kimi K2.6 market price to compare models fairly.
|
||||
// Source: OpenRouter moonshotai/kimi-k2.6, 2026-04-30.
|
||||
cost: { input: 0.7448, output: 4.655, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 262144,
|
||||
maxTokens: 32768,
|
||||
},
|
||||
{
|
||||
id: "k2p5",
|
||||
name: "Kimi K2.5",
|
||||
id: "kimi-k2-thinking",
|
||||
name: "Kimi K2 Thinking",
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi-coding",
|
||||
baseUrl: KIMI_CODING_BASE_URL,
|
||||
|
|
|
|||
|
|
@ -4626,26 +4626,8 @@ export const MODELS = {
|
|||
input: ["text", "image"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 2.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 32768,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"k2p5": {
|
||||
id: "k2p5",
|
||||
name: "Kimi K2.5",
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi-coding",
|
||||
baseUrl: "https://api.kimi.com/coding",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
input: 0.7448,
|
||||
output: 4.655,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -122,6 +122,7 @@ describe("model registry — xiaomi provider", () => {
|
|||
const models = getModels("xiaomi" as any);
|
||||
const ids = models.map((m) => m.id).sort();
|
||||
assert.deepEqual(ids, [
|
||||
"mimo-v2-flash",
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2.5",
|
||||
|
|
@ -165,6 +166,14 @@ describe("model registry — kimi-coding provider", () => {
|
|||
assert.equal(model.baseUrl, "https://api.kimi.com/coding");
|
||||
assert.equal(model.contextWindow, 262144);
|
||||
});
|
||||
|
||||
it("kimi-coding uses market comparison pricing for Kimi K2.6", () => {
|
||||
const model = getModel("kimi-coding" as any, "kimi-for-coding" as any);
|
||||
assert.ok(model, "Expected getModel to return kimi-coding/kimi-for-coding");
|
||||
assert.equal(model.name, "Kimi K2.6");
|
||||
assert.equal(model.cost.input, 0.7448);
|
||||
assert.equal(model.cost.output, 4.655);
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ const BARE_MODEL_FAMILY_PRIORITY: Array<{
|
|||
}> = [
|
||||
{ match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] },
|
||||
{
|
||||
match: /^kimi-|^k2p5$/i,
|
||||
match: /^kimi-/i,
|
||||
providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"],
|
||||
},
|
||||
{ match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] },
|
||||
|
|
@ -176,12 +176,11 @@ function preferredBareModelIds(modelId: string): readonly string[] | undefined {
|
|||
];
|
||||
}
|
||||
if (
|
||||
lower === "k2p5" ||
|
||||
lower === "kimi-k2.5" ||
|
||||
lower === "kimi-k2.5:cloud" ||
|
||||
lower === "moonshotai/kimi-k2.5"
|
||||
) {
|
||||
return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud", "k2p5"];
|
||||
return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud"];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
|
@ -227,12 +226,10 @@ function bareModelIdAliases(modelId: string): Set<string> {
|
|||
aliases.add("moonshotai/kimi-k2.6");
|
||||
}
|
||||
if (
|
||||
lower === "k2p5" ||
|
||||
lower === "kimi-k2.5" ||
|
||||
lower === "kimi-k2.5:cloud" ||
|
||||
lower === "moonshotai/kimi-k2.5"
|
||||
) {
|
||||
aliases.add("k2p5");
|
||||
aliases.add("kimi-k2.5");
|
||||
aliases.add("kimi-k2.5:cloud");
|
||||
aliases.add("moonshotai/kimi-k2.5");
|
||||
|
|
|
|||
|
|
@ -365,8 +365,8 @@ const BENCHMARK_KEY_ALIASES: Record<string, string> = {
|
|||
"moonshotai/kimi-k2.6": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
// Kimi Code / aggregator wire IDs. The benchmark identity is Kimi K2.5.
|
||||
k2p5: "kimi-k2.5",
|
||||
// Kimi aggregator wire IDs. Kimi Code's `kimi-for-coding` is K2.6 above.
|
||||
"kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai.kimi-k2.5": "kimi-k2.5",
|
||||
"kimi-k2.5:cloud": "kimi-k2.5",
|
||||
|
|
|
|||
|
|
@ -33,6 +33,17 @@ const NEUTRAL_OBSERVED_SCORE = 50;
|
|||
const SCORE_SCALE = 100;
|
||||
const UNTRIED_MODEL_BONUS = 1000;
|
||||
const DEFAULT_MAX_RETRIES = 5;
|
||||
const BENCHMARK_ID_ALIASES = Object.freeze({
|
||||
"kimi-for-coding": "kimi-k2.6",
|
||||
"moonshotai/kimi-k2.6": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
"kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai.kimi-k2.5": "kimi-k2.5",
|
||||
"kimi-k2.5:cloud": "kimi-k2.5",
|
||||
"kimi-k2.5-cloud": "kimi-k2.5",
|
||||
});
|
||||
|
||||
/**
|
||||
* Core blend: α · prior + (1 - α) · observed
|
||||
|
|
@ -151,7 +162,7 @@ export function computeObservedScore(
|
|||
/**
|
||||
* Full ranking of eligible models for a unit type.
|
||||
*
|
||||
* @param {string[]} eligibleModels - e.g. ["kimi-coding/k2p5", "minimax/MiniMax-M2.7"]
|
||||
* @param {string[]} eligibleModels - e.g. ["kimi-coding/kimi-for-coding", "minimax/MiniMax-M2.7"]
|
||||
* @param {string} unitType - e.g. "execute-task" (currently informational)
|
||||
* @param {Object} priorsByModel - {modelId: priorScore (0-100)} — from loadCapabilityOverrides
|
||||
* @param {Object} observedByModel - {modelId: AggregatedStats} — from outcome-aggregator
|
||||
|
|
@ -218,8 +229,8 @@ export function blendedRanking(
|
|||
|
||||
/**
|
||||
* Helper: map a model id to its bare name for benchmark lookup.
|
||||
* "kimi-coding/k2p5" → "k2p5"
|
||||
* "k2p5" → "k2p5"
|
||||
* "kimi-coding/kimi-for-coding" → "kimi-k2.6"
|
||||
* "kimi-k2.5" → "kimi-k2.5"
|
||||
* "ollama-cloud/qwen3-coder:480b" → "qwen3-coder:480b"
|
||||
*
|
||||
* @param {string} modelId
|
||||
|
|
@ -228,7 +239,8 @@ export function blendedRanking(
|
|||
export function stripProviderPrefix(modelId) {
|
||||
const slashIndex = modelId.indexOf("/");
|
||||
if (slashIndex === -1) {
|
||||
return modelId;
|
||||
return BENCHMARK_ID_ALIASES[modelId.toLowerCase()] ?? modelId;
|
||||
}
|
||||
return modelId.slice(slashIndex + 1);
|
||||
const stripped = modelId.slice(slashIndex + 1);
|
||||
return BENCHMARK_ID_ALIASES[stripped.toLowerCase()] ?? stripped;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,12 +263,12 @@ test("blendedRanking: result entries have all expected fields", () => {
|
|||
|
||||
// ---------- stripProviderPrefix ----------
|
||||
|
||||
test("stripProviderPrefix: 'kimi-coding/k2p5' → 'k2p5'", () => {
|
||||
assert.equal(stripProviderPrefix("kimi-coding/k2p5"), "k2p5");
|
||||
test("stripProviderPrefix: 'kimi-coding/kimi-for-coding' → 'kimi-k2.6'", () => {
|
||||
assert.equal(stripProviderPrefix("kimi-coding/kimi-for-coding"), "kimi-k2.6");
|
||||
});
|
||||
|
||||
test("stripProviderPrefix: 'k2p5' (no prefix) → 'k2p5'", () => {
|
||||
assert.equal(stripProviderPrefix("k2p5"), "k2p5");
|
||||
test("stripProviderPrefix: 'kimi-k2.5' (no prefix) → 'kimi-k2.5'", () => {
|
||||
assert.equal(stripProviderPrefix("kimi-k2.5"), "kimi-k2.5");
|
||||
});
|
||||
|
||||
test("stripProviderPrefix: 'ollama-cloud/qwen3-coder:480b' → 'qwen3-coder:480b'", () => {
|
||||
|
|
|
|||
|
|
@ -189,7 +189,7 @@
|
|||
"long_context_ruler": null,
|
||||
"arena_elo": null,
|
||||
"instruction_following": null,
|
||||
"source": "Moonshot Kimi K2.5 semantic benchmark key; Kimi Code wire ID is k2p5",
|
||||
"source": "Moonshot Kimi K2.5 semantic benchmark key; provider routes should use real K2.5 model IDs",
|
||||
"context_window": 262144,
|
||||
"max_output_tokens": 32768
|
||||
},
|
||||
|
|
|
|||
|
|
@ -155,8 +155,9 @@ function splitProviderModel(fullModelId) {
|
|||
* `glm-5`) into concrete pi-ai FallbackChainEntry records.
|
||||
*
|
||||
* Example:
|
||||
* enabledModels = ["kimi-coding/k2p5", "opencode-go/k2p5", "zai/glm-5"]
|
||||
* → { kimi-k2.5: [{provider:"kimi-coding", model:"k2p5"}, {provider:"opencode-go", model:"k2p5"}],
|
||||
* enabledModels = ["kimi-coding/kimi-for-coding", "opencode-go/kimi-k2.5", "zai/glm-5"]
|
||||
* → { kimi-k2.6: [{provider:"kimi-coding", model:"kimi-for-coding"}],
|
||||
* kimi-k2.5: [{provider:"opencode-go", model:"kimi-k2.5"}],
|
||||
* glm-5: [{provider:"zai", model:"glm-5"}] }
|
||||
*
|
||||
* Matching is case-sensitive. Ollama-cloud style IDs with `:cloud` suffix
|
||||
|
|
@ -171,7 +172,7 @@ const BENCHMARK_INDEX_ALIASES = Object.freeze({
|
|||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
"moonshotai/kimi-k2.6": "kimi-k2.6",
|
||||
k2p5: "kimi-k2.5",
|
||||
"kimi-k2.5": "kimi-k2.5",
|
||||
"kimi-k2.5:cloud": "kimi-k2.5",
|
||||
"kimi-k2.5-cloud": "kimi-k2.5",
|
||||
"moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
|
|
@ -254,7 +255,7 @@ function readEnabledModels(settingsPath) {
|
|||
* FallbackChainEntry records. For each rank position, emits one entry per
|
||||
* concrete (provider, model) pair that matches the benchmark key.
|
||||
*
|
||||
* - Pre-prefixed IDs (`kimi-coding/k2p5`) produce exactly one entry.
|
||||
* - Pre-prefixed IDs (`kimi-coding/kimi-for-coding`) produce exactly one entry.
|
||||
* - Semantic IDs (`kimi-k2.5`, `glm-5`) produce one entry per provider offering
|
||||
* that model or a known wire alias in `enabledModels` — so a model available via multiple
|
||||
* providers automatically becomes multiple parallel fallback options
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ test("writeFallbackChains produces entries with integer priorities (no undefined
|
|||
const { dir, settingsPath } = makeTempSettingsDir();
|
||||
try {
|
||||
const overrides = {
|
||||
"kimi-coding/k2p5": { reasoning: 90 },
|
||||
"kimi-coding/kimi-for-coding": { reasoning: 90 },
|
||||
"minimax/MiniMax-M2.7": { reasoning: 80 },
|
||||
"zai/glm-5.1": { reasoning: 70 },
|
||||
};
|
||||
|
|
@ -178,8 +178,8 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider
|
|||
JSON.stringify(
|
||||
{
|
||||
enabledModels: [
|
||||
"kimi-coding/k2p5",
|
||||
"opencode-go/k2p5",
|
||||
"kimi-coding/kimi-for-coding",
|
||||
"opencode-go/kimi-k2.5",
|
||||
"ollama-cloud/kimi-k2.5:cloud",
|
||||
"zai/glm-5",
|
||||
"ollama-cloud/glm-5:cloud",
|
||||
|
|
@ -191,9 +191,10 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider
|
|||
);
|
||||
|
||||
// Semantic-id overrides as they appear in model-benchmarks.json.
|
||||
// `kimi-k2.5` exercises Kimi Code's `k2p5` wire alias and the
|
||||
// `:cloud` stripped-suffix match.
|
||||
// Kimi K2.6 exercises Kimi Code's wire route. Kimi K2.5 exercises
|
||||
// real K2.5 provider routes and the `:cloud` stripped-suffix match.
|
||||
const overrides = {
|
||||
"kimi-k2.6": { __benchmarks: { bench_p: 95 } },
|
||||
"kimi-k2.5": { __benchmarks: { bench_p: 90 } },
|
||||
"glm-5": { __benchmarks: { bench_p: 80 } },
|
||||
};
|
||||
|
|
@ -214,14 +215,13 @@ test("writeFallbackChains expands semantic benchmark keys into concrete provider
|
|||
(e) => `${e.provider}/${e.model}`,
|
||||
);
|
||||
|
||||
// kimi-k2.5 should expand to kimi-coding/k2p5 AND opencode-go/k2p5.
|
||||
assert.ok(
|
||||
providerModelPairs.includes("kimi-coding/k2p5"),
|
||||
"kimi-coding/k2p5 present",
|
||||
providerModelPairs.includes("kimi-coding/kimi-for-coding"),
|
||||
"kimi-k2.6 expanded to Kimi Code route",
|
||||
);
|
||||
assert.ok(
|
||||
providerModelPairs.includes("opencode-go/k2p5"),
|
||||
"opencode-go/k2p5 present",
|
||||
providerModelPairs.includes("opencode-go/kimi-k2.5"),
|
||||
"kimi-k2.5 expanded to real K2.5 provider route",
|
||||
);
|
||||
|
||||
// glm-5 should expand to zai/glm-5 AND ollama-cloud/glm-5:cloud
|
||||
|
|
@ -299,7 +299,7 @@ test("writeFallbackChains warns via log when project-level .sf/agent/settings.js
|
|||
const warnings = [];
|
||||
try {
|
||||
const deps = makeDeps({
|
||||
overrides: { "kimi-coding/k2p5": { reasoning: 90 } },
|
||||
overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } },
|
||||
log: (msg) => warnings.push(msg),
|
||||
});
|
||||
const result = writeFallbackChains(globalSettingsPath, deps);
|
||||
|
|
@ -355,7 +355,7 @@ test("hardcoded main chain coexists with blender-computed per-unit-type chains",
|
|||
settingsPath,
|
||||
JSON.stringify(
|
||||
{
|
||||
enabledModels: ["kimi-coding/k2p5", "zai/glm-5"],
|
||||
enabledModels: ["kimi-coding/kimi-for-coding", "zai/glm-5"],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
|
|
@ -410,7 +410,7 @@ test("writeFallbackChains does NOT warn when cwd is the parent of the global set
|
|||
writeFileSync(
|
||||
globalSettingsPath,
|
||||
JSON.stringify({
|
||||
enabledModels: ["kimi-coding/k2p5"],
|
||||
enabledModels: ["kimi-coding/kimi-for-coding"],
|
||||
fallback: { enabled: true, chains: {} },
|
||||
}),
|
||||
);
|
||||
|
|
@ -420,7 +420,7 @@ test("writeFallbackChains does NOT warn when cwd is the parent of the global set
|
|||
const warnings = [];
|
||||
try {
|
||||
const deps = makeDeps({
|
||||
overrides: { "kimi-coding/k2p5": { reasoning: 90 } },
|
||||
overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } },
|
||||
log: (msg) => warnings.push(msg),
|
||||
});
|
||||
const result = writeFallbackChains(globalSettingsPath, deps);
|
||||
|
|
@ -459,7 +459,7 @@ test("writeFallbackChains does NOT warn when project settings has no fallback bl
|
|||
const warnings = [];
|
||||
try {
|
||||
const deps = makeDeps({
|
||||
overrides: { "kimi-coding/k2p5": { reasoning: 90 } },
|
||||
overrides: { "kimi-coding/kimi-for-coding": { reasoning: 90 } },
|
||||
log: (msg) => warnings.push(msg),
|
||||
});
|
||||
const result = writeFallbackChains(globalSettingsPath, deps);
|
||||
|
|
|
|||
|
|
@ -307,8 +307,8 @@ test("registerRoutingHook: registers handler + reload command and routes a simul
|
|||
unitType: "execute-task",
|
||||
unitId: "test-unit-1",
|
||||
classification: { tier: "primary", reason: "test", downgraded: false },
|
||||
eligibleModels: ["kimi-coding/k2p5", "minimax/MiniMax-M2.7"],
|
||||
phaseConfig: { primary: "kimi-coding/k2p5", fallbacks: [] },
|
||||
eligibleModels: ["kimi-coding/kimi-for-coding", "minimax/MiniMax-M2.7"],
|
||||
phaseConfig: { primary: "kimi-coding/kimi-for-coding", fallbacks: [] },
|
||||
};
|
||||
const ctx = { ui: { notify: () => {} }, hasUI: false };
|
||||
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ const BENCHMARK_ID_ALIASES = Object.freeze({
|
|||
"moonshotai/kimi-k2.6": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
k2p5: "kimi-k2.5",
|
||||
"kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai.kimi-k2.5": "kimi-k2.5",
|
||||
"kimi-k2.5:cloud": "kimi-k2.5",
|
||||
|
|
@ -171,7 +171,7 @@ const BENCHMARK_ID_ALIASES = Object.freeze({
|
|||
});
|
||||
|
||||
/**
|
||||
* Strip provider prefix from a model id. `kimi-coding/k2p5` -> `k2p5`.
|
||||
* Strip provider prefix from a model id. `kimi-coding/kimi-for-coding` -> `kimi-k2.6`.
|
||||
*
|
||||
* @param {string} modelId
|
||||
* @returns {string}
|
||||
|
|
|
|||
|
|
@ -268,20 +268,20 @@ test("loadCapabilityOverrides: computeUnitTypeScore resolves provider wire ids t
|
|||
const { overrides, weights } = await loadCapabilityOverrides();
|
||||
assert.ok(overrides["kimi-k2.5"], "canonical Kimi K2.5 benchmark key exists");
|
||||
assert.equal(
|
||||
overrides.k2p5,
|
||||
overrides["kimi-coding/kimi-for-coding"],
|
||||
undefined,
|
||||
"k2p5 is a wire alias, not a benchmark key",
|
||||
"Kimi Code provider route is not a benchmark key",
|
||||
);
|
||||
|
||||
const prefixed = computeUnitTypeScore(
|
||||
"kimi-coding/k2p5",
|
||||
"kimi-coding/kimi-for-coding",
|
||||
"execute-task",
|
||||
overrides,
|
||||
weights,
|
||||
);
|
||||
const wire = computeUnitTypeScore("k2p5", "execute-task", overrides, weights);
|
||||
const wire = computeUnitTypeScore("kimi-for-coding", "execute-task", overrides, weights);
|
||||
const semantic = computeUnitTypeScore(
|
||||
"kimi-k2.5",
|
||||
"kimi-k2.6",
|
||||
"execute-task",
|
||||
overrides,
|
||||
weights,
|
||||
|
|
@ -289,7 +289,7 @@ test("loadCapabilityOverrides: computeUnitTypeScore resolves provider wire ids t
|
|||
assert.strictEqual(
|
||||
prefixed,
|
||||
semantic,
|
||||
"provider wire id resolves to semantic Kimi K2.5 score",
|
||||
"provider wire id resolves to semantic Kimi K2.6 score",
|
||||
);
|
||||
assert.strictEqual(
|
||||
wire,
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ function rowToStats(row, modelId, unitType, windowDays) {
|
|||
* @returns {AggregatedStats}
|
||||
*
|
||||
* @example
|
||||
* const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", {rollingDays: 30});
|
||||
* const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", {rollingDays: 30});
|
||||
* // {modelId, unitType, sample_count: 12, success_rate: 0.83, ...}
|
||||
*/
|
||||
export function aggregateOutcomes(db, modelId, unitType, opts = {}) {
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ const INSERT_SQL = `
|
|||
* Validated outcome shape for insertion.
|
||||
*
|
||||
* @typedef {Object} Outcome
|
||||
* @property {string} modelId e.g. "kimi-coding/k2p5"
|
||||
* @property {string} modelId e.g. "kimi-coding/kimi-for-coding"
|
||||
* @property {string} provider e.g. "kimi-coding"
|
||||
* @property {string} unitType e.g. "research-slice", "execute-task"
|
||||
* @property {string} unitId e.g. "M001/S01" or "M001/S01/T01"
|
||||
|
|
@ -79,7 +79,7 @@ const INSERT_SQL = `
|
|||
* @returns {{valid: boolean, errors: string[]}}
|
||||
*
|
||||
* @example
|
||||
* const r = validateOutcome({modelId: "k2p5", provider: "kimi", unitType: "execute-task", unitId: "M001/S01/T01", succeeded: true});
|
||||
* const r = validateOutcome({modelId: "kimi-k2.5", provider: "kimi", unitType: "execute-task", unitId: "M001/S01/T01", succeeded: true});
|
||||
* // r.valid === true
|
||||
*/
|
||||
export function validateOutcome(outcome) {
|
||||
|
|
@ -206,7 +206,7 @@ function buildInsertParams(outcome) {
|
|||
*
|
||||
* @example
|
||||
* recordOutcome(db, {
|
||||
* modelId: "kimi-coding/k2p5",
|
||||
* modelId: "kimi-coding/kimi-for-coding",
|
||||
* provider: "kimi-coding",
|
||||
* unitType: "execute-task",
|
||||
* unitId: "M001/S01/T01",
|
||||
|
|
|
|||
|
|
@ -229,7 +229,7 @@ function runRecentSelect(sql, params, rows) {
|
|||
|
||||
function minimalOutcome(overrides = {}) {
|
||||
return {
|
||||
modelId: "kimi-coding/k2p5",
|
||||
modelId: "kimi-coding/kimi-for-coding",
|
||||
provider: "kimi-coding",
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
|
|
@ -423,7 +423,7 @@ test("aggregateOutcomes computes success_rate correctly from multiple rows", ()
|
|||
minimalOutcome({ succeeded: false, recorded_at: now - 1000 }),
|
||||
);
|
||||
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", {
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", {
|
||||
now,
|
||||
});
|
||||
assert.equal(stats.sample_count, 4);
|
||||
|
|
@ -445,7 +445,7 @@ test("aggregateOutcomes excludes rows outside the rolling window", () => {
|
|||
minimalOutcome({ succeeded: false, recorded_at: now - 60 * oneDayMs }),
|
||||
);
|
||||
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", {
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", {
|
||||
now,
|
||||
rollingDays: 30,
|
||||
});
|
||||
|
|
@ -460,7 +460,7 @@ test("aggregateOutcomes verification_pass_rate is null when no verification data
|
|||
db,
|
||||
minimalOutcome({ verification_passed: null, recorded_at: now - 1000 }),
|
||||
);
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/k2p5", "execute-task", {
|
||||
const stats = aggregateOutcomes(db, "kimi-coding/kimi-for-coding", "execute-task", {
|
||||
now,
|
||||
});
|
||||
assert.equal(stats.verification_pass_rate, null);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ function inferProviderFromBareModelId(modelId: string): string {
|
|||
const lower = modelId.toLowerCase();
|
||||
if (
|
||||
lower === "kimi-for-coding" ||
|
||||
lower === "k2p5" ||
|
||||
lower === "kimi-k2-thinking"
|
||||
)
|
||||
return "kimi-coding";
|
||||
|
|
|
|||
|
|
@ -33,8 +33,6 @@ export function normalizedModelName(model: {
|
|||
)
|
||||
return "Kimi K2.6";
|
||||
if (
|
||||
(provider === "kimi-coding" && id === "k2p5") ||
|
||||
id === "k2p5" ||
|
||||
id === "kimi-k2.5" ||
|
||||
id === "kimi-k2.5:cloud" ||
|
||||
id === "moonshotai/kimi-k2.5"
|
||||
|
|
|
|||
|
|
@ -426,17 +426,16 @@ test("resolveModelId: bare Kimi K2.6 can resolve Ollama Cloud exact version when
|
|||
assert.equal(result.id, "kimi-k2.6");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare Kimi K2.5 IDs do not alias to K2.6", () => {
|
||||
test("resolveModelId: bare Kimi K2.5 IDs do not route to Kimi Code K2.6", () => {
|
||||
const availableModels = [
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
{ id: "k2p5", provider: "kimi-coding" },
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
];
|
||||
|
||||
const result = resolveModelId("kimi-k2.5", availableModels, "opencode-go");
|
||||
assert.ok(result, "should resolve a Kimi model");
|
||||
assert.equal(result.provider, "kimi-coding");
|
||||
assert.equal(result.id, "k2p5");
|
||||
assert.equal(result.provider, "opencode-go");
|
||||
assert.equal(result.id, "kimi-k2.5");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => {
|
||||
|
|
@ -451,32 +450,30 @@ test("resolveModelId: bare Kimi K2.5 only resolves exact K2.5 aliases", () => {
|
|||
assert.equal(result.id, "kimi-k2.5");
|
||||
});
|
||||
|
||||
test("resolveModelId: bare k2p5 prefers direct Kimi Code when available", () => {
|
||||
test("resolveModelId: bare Kimi K2.5 still uses a real K2.5 provider when Kimi Code is preferred", () => {
|
||||
const availableModels = [
|
||||
{ id: "k2p5", provider: "kimi-coding" },
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
];
|
||||
|
||||
const result = resolveModelId("k2p5", availableModels, "kimi-coding");
|
||||
const result = resolveModelId("kimi-k2.5", availableModels, "kimi-coding");
|
||||
assert.ok(result, "should resolve a real K2.5 model");
|
||||
assert.equal(result.provider, "kimi-coding");
|
||||
assert.equal(result.id, "k2p5");
|
||||
assert.equal(result.provider, "opencode-go");
|
||||
assert.equal(result.id, "kimi-k2.5");
|
||||
});
|
||||
|
||||
test("resolveModelId: explicit Kimi Code k2p5 route stays on K2.5 wire id", () => {
|
||||
test("resolveModelId: explicit provider Kimi K2.5 route does not alias to K2.6", () => {
|
||||
const availableModels = [
|
||||
{ id: "k2p5", provider: "kimi-coding" },
|
||||
{ id: "kimi-for-coding", provider: "kimi-coding" },
|
||||
{ id: "kimi-k2.5", provider: "opencode-go" },
|
||||
];
|
||||
|
||||
const result = resolveModelId(
|
||||
"kimi-coding/k2p5",
|
||||
"kimi-coding/kimi-k2.5",
|
||||
availableModels,
|
||||
"kimi-coding",
|
||||
);
|
||||
assert.ok(result, "should resolve Kimi Code K2.5 wire route");
|
||||
assert.equal(result.provider, "kimi-coding");
|
||||
assert.equal(result.id, "k2p5");
|
||||
assert.equal(result, undefined);
|
||||
});
|
||||
|
||||
test("resolveModelId: bare MiniMax IDs prefer minimax over minimax-cn and aggregators", () => {
|
||||
|
|
|
|||
|
|
@ -443,7 +443,7 @@ describe("benchmark-selector", () => {
|
|||
aime_2026: 99,
|
||||
gpqa: 99,
|
||||
},
|
||||
k2p5: {
|
||||
"kimi-for-coding": {
|
||||
swe_bench: 1,
|
||||
live_code_bench: 1,
|
||||
human_eval: 1,
|
||||
|
|
@ -454,15 +454,15 @@ describe("benchmark-selector", () => {
|
|||
const r = selectByBenchmarks(
|
||||
"execute-task",
|
||||
[
|
||||
{ provider: "kimi-coding", id: "k2p5" },
|
||||
{ provider: "opencode-go", id: "kimi-k2.5" },
|
||||
{ provider: "openrouter", id: "moonshotai/kimi-k2.5" },
|
||||
],
|
||||
{ benchmarks: fixture },
|
||||
);
|
||||
assert.ok(r);
|
||||
assert.ok(
|
||||
r.scores["kimi-coding/k2p5"] > 90,
|
||||
"expected Kimi Code k2p5 route to score from kimi-k2.5",
|
||||
r.scores["opencode-go/kimi-k2.5"] > 90,
|
||||
"expected provider Kimi K2.5 route to score from kimi-k2.5",
|
||||
);
|
||||
assert.ok(
|
||||
r.scores["openrouter/moonshotai/kimi-k2.5"] > 90,
|
||||
|
|
|
|||
|
|
@ -25,12 +25,6 @@ test("model identity: Ollama Cloud live K2.6 id displays as Kimi K2.6", () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("model identity: Kimi Code k2p5 wire route displays as K2.5", () => {
|
||||
const model = { provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" };
|
||||
assert.equal(normalizedModelName(model), "Kimi K2.5");
|
||||
assert.equal(formatModelIdentity(model), "Kimi K2.5 (kimi-coding/k2p5)");
|
||||
});
|
||||
|
||||
test("model identity: K2.5 remains distinct from K2.6", () => {
|
||||
const model = { provider: "opencode-go", id: "kimi-k2.5" };
|
||||
assert.equal(normalizedModelName(model), "Kimi K2.5");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue