preferences: add provider_preference for benchmark tie-breaking

When two models score identically in the benchmark selector — typically
the same underlying weights served by different endpoints — the
previous alphabetical tiebreaker picked wrong. dr-repo example:

  zai/glm-5.1       score 84.7
  opencode-go/glm-5.1 score 84.7

Both are the exact same GLM-5.1 weights. Alphabetical comparison made
opencode-go win ("o" < "z") even though zai is the NATIVE provider.

Fix: new `provider_preference` pref, an ordered list of providers.
Listed providers rank in order, unlisted fall after alphabetically.
Applied as the tie-breaker between score and alphabetical.

Global default shipped in ~/.sf/preferences.md:
  kimi-coding, minimax, zai, mistral, ollama-cloud, opencode-go,
  opencode

Native providers ranked before re-servers. Users can override per
project.

Verified: after the change, dr-repo picks zai/glm-5.1 as primary for
execute-task and gate-evaluate (was opencode-go/glm-5.1), and
kimi-coding/k2p5 stays primary for completion phases with its direct
provider winning over opencode re-servers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-19 10:09:42 +02:00
parent 345f9586dd
commit e413cf4a3f
5 changed files with 53 additions and 3 deletions

View file

@ -270,6 +270,13 @@ export interface SelectOptions {
maxEntries?: number;
/** Explicit benchmark data override (tests). */
benchmarks?: BenchmarkData;
/**
* Ordered provider ranking for tie-breaking. When two models score
* identically (typically same underlying weights served by different
* endpoints), the provider earlier in this list wins. Providers not
* listed rank after all listed providers, alphabetically.
*/
providerPreference?: string[];
}
/**
@ -286,16 +293,28 @@ export function selectByBenchmarks(
const benchmarks = opts.benchmarks ?? loadBenchmarks();
const maxEntries = opts.maxEntries ?? 4;
// Build a provider-rank map. Listed providers get their index; unlisted
// fall after all listed ones. Case-insensitive.
const providerRank = new Map<string, number>();
const prefList = (opts.providerPreference ?? []).map(p => p.trim().toLowerCase());
prefList.forEach((p, i) => { if (p && !providerRank.has(p)) providerRank.set(p, i); });
const UNLISTED_RANK = 1_000_000;
const rankOf = (prov: string) => providerRank.get(prov) ?? UNLISTED_RANK;
const ranked = candidates.map(c => {
const { score, coverage } = scoreCandidate(c, weights, benchmarks);
const fullId = `${c.provider}/${c.id}`;
return { id: fullId, provider: c.provider.toLowerCase(), score, coverage };
})
// Stable sort: higher score first, then higher coverage as tiebreak,
// then alphabetical for determinism.
// Stable sort: higher score first, then higher coverage, then
// provider_preference rank (lower = earlier = preferred), then
// alphabetical for determinism.
.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
if (b.coverage !== a.coverage) return b.coverage - a.coverage;
const ra = rankOf(a.provider);
const rb = rankOf(b.provider);
if (ra !== rb) return ra - rb;
return a.id.localeCompare(b.id);
});

View file

@ -71,7 +71,9 @@ function resolveAutoBenchmarkPickForUnit(
}
}
if (candidates.length === 0) return undefined;
const picked = selectByBenchmarks(unitType, candidates);
const picked = selectByBenchmarks(unitType, candidates, {
providerPreference: prefs?.provider_preference,
});
if (!picked) return undefined;
return { primary: picked.primary, fallbacks: picked.fallbacks };
} catch {

View file

@ -101,6 +101,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
"github",
"service_tier",
"allowed_providers",
"provider_preference",
"forensics_dedup",
"show_token_cost",
"stale_commit_threshold_minutes",
@ -440,6 +441,21 @@ export interface SFPreferences {
* within it, and dynamic routing's `tier_models` stays inside the gate.
*/
allowed_providers?: string[];
/**
* Provider ranking for benchmark-selector tie-breaking. When two models
* score identically (typically the same underlying weights served by
* different endpoints, e.g. `zai/glm-5.1` vs `opencode-go/glm-5.1`),
* the provider earlier in this list wins. Case-insensitive.
*
* Providers not in the list fall back to alphabetical order after all
* ranked providers, so partial lists work rank only the ones you care
* about. Typical use: put direct/native providers first, re-servers
* (opencode, opencode-go, openrouter) later.
*
* Example:
* provider_preference: [kimi-coding, minimax, zai, mistral, opencode-go, opencode]
*/
provider_preference?: string[];
}
export interface LoadedSFPreferences {

View file

@ -392,6 +392,18 @@ export function validatePreferences(preferences: SFPreferences): {
}
}
// ─── Provider Preference (benchmark tie-break order) ────────────────
if (preferences.provider_preference !== undefined) {
if (Array.isArray(preferences.provider_preference) && preferences.provider_preference.every(s => typeof s === "string")) {
const cleaned = preferences.provider_preference
.map((s: string) => s.trim().toLowerCase())
.filter((s: string) => s.length > 0);
if (cleaned.length > 0) validated.provider_preference = cleaned;
} else {
errors.push("provider_preference must be an array of provider-ID strings");
}
}
// ─── Allowed Providers (hard allowlist) ─────────────────────────────
// When set, model selection is gated to these providers only — any
// model from any other provider is filtered out of the candidate set

View file

@ -507,6 +507,7 @@ function mergePreferences(base: SFPreferences, override: SFPreferences): SFPrefe
// of latent bug as service_tier (fixed separately). Each gets a simple
// override-wins merge so the preference actually reaches consumers.
allowed_providers: mergeStringLists(base.allowed_providers, override.allowed_providers),
provider_preference: override.provider_preference ?? base.provider_preference,
flat_rate_providers: mergeStringLists(base.flat_rate_providers, override.flat_rate_providers),
stale_commit_threshold_minutes: override.stale_commit_threshold_minutes ?? base.stale_commit_threshold_minutes,
widget_mode: override.widget_mode ?? base.widget_mode,