preferences: add provider_preference for benchmark tie-breaking
When two models score identically in the benchmark selector — typically
the same underlying weights served by different endpoints — the
previous alphabetical tiebreaker picked wrong. dr-repo example:
zai/glm-5.1 score 84.7
opencode-go/glm-5.1 score 84.7
Both are the exact same GLM-5.1 weights. Alphabetical comparison made
opencode-go win ("o" < "z") even though zai is the NATIVE provider.
Fix: new `provider_preference` pref, an ordered list of providers.
Listed providers rank in order, unlisted fall after alphabetically.
Applied as the tie-breaker between score and alphabetical.
Global default shipped in ~/.sf/preferences.md:
kimi-coding, minimax, zai, mistral, ollama-cloud, opencode-go,
opencode
Native providers ranked before re-servers. Users can override per
project.
Verified: after the change, dr-repo picks zai/glm-5.1 as primary for
execute-task and gate-evaluate (was opencode-go/glm-5.1), and
kimi-coding/k2p5 stays primary for completion phases with its direct
provider winning over opencode re-servers.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
345f9586dd
commit
e413cf4a3f
5 changed files with 53 additions and 3 deletions
|
|
@ -270,6 +270,13 @@ export interface SelectOptions {
|
|||
maxEntries?: number;
|
||||
/** Explicit benchmark data override (tests). */
|
||||
benchmarks?: BenchmarkData;
|
||||
/**
|
||||
* Ordered provider ranking for tie-breaking. When two models score
|
||||
* identically (typically same underlying weights served by different
|
||||
* endpoints), the provider earlier in this list wins. Providers not
|
||||
* listed rank after all listed providers, alphabetically.
|
||||
*/
|
||||
providerPreference?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -286,16 +293,28 @@ export function selectByBenchmarks(
|
|||
const benchmarks = opts.benchmarks ?? loadBenchmarks();
|
||||
const maxEntries = opts.maxEntries ?? 4;
|
||||
|
||||
// Build a provider-rank map. Listed providers get their index; unlisted
|
||||
// fall after all listed ones. Case-insensitive.
|
||||
const providerRank = new Map<string, number>();
|
||||
const prefList = (opts.providerPreference ?? []).map(p => p.trim().toLowerCase());
|
||||
prefList.forEach((p, i) => { if (p && !providerRank.has(p)) providerRank.set(p, i); });
|
||||
const UNLISTED_RANK = 1_000_000;
|
||||
const rankOf = (prov: string) => providerRank.get(prov) ?? UNLISTED_RANK;
|
||||
|
||||
const ranked = candidates.map(c => {
|
||||
const { score, coverage } = scoreCandidate(c, weights, benchmarks);
|
||||
const fullId = `${c.provider}/${c.id}`;
|
||||
return { id: fullId, provider: c.provider.toLowerCase(), score, coverage };
|
||||
})
|
||||
// Stable sort: higher score first, then higher coverage as tiebreak,
|
||||
// then alphabetical for determinism.
|
||||
// Stable sort: higher score first, then higher coverage, then
|
||||
// provider_preference rank (lower = earlier = preferred), then
|
||||
// alphabetical for determinism.
|
||||
.sort((a, b) => {
|
||||
if (b.score !== a.score) return b.score - a.score;
|
||||
if (b.coverage !== a.coverage) return b.coverage - a.coverage;
|
||||
const ra = rankOf(a.provider);
|
||||
const rb = rankOf(b.provider);
|
||||
if (ra !== rb) return ra - rb;
|
||||
return a.id.localeCompare(b.id);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,9 @@ function resolveAutoBenchmarkPickForUnit(
|
|||
}
|
||||
}
|
||||
if (candidates.length === 0) return undefined;
|
||||
const picked = selectByBenchmarks(unitType, candidates);
|
||||
const picked = selectByBenchmarks(unitType, candidates, {
|
||||
providerPreference: prefs?.provider_preference,
|
||||
});
|
||||
if (!picked) return undefined;
|
||||
return { primary: picked.primary, fallbacks: picked.fallbacks };
|
||||
} catch {
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
|
|||
"github",
|
||||
"service_tier",
|
||||
"allowed_providers",
|
||||
"provider_preference",
|
||||
"forensics_dedup",
|
||||
"show_token_cost",
|
||||
"stale_commit_threshold_minutes",
|
||||
|
|
@ -440,6 +441,21 @@ export interface SFPreferences {
|
|||
* within it, and dynamic routing's `tier_models` stays inside the gate.
|
||||
*/
|
||||
allowed_providers?: string[];
|
||||
/**
|
||||
* Provider ranking for benchmark-selector tie-breaking. When two models
|
||||
* score identically (typically the same underlying weights served by
|
||||
* different endpoints, e.g. `zai/glm-5.1` vs `opencode-go/glm-5.1`),
|
||||
* the provider earlier in this list wins. Case-insensitive.
|
||||
*
|
||||
* Providers not in the list fall back to alphabetical order after all
|
||||
* ranked providers, so partial lists work — rank only the ones you care
|
||||
* about. Typical use: put direct/native providers first, re-servers
|
||||
* (opencode, opencode-go, openrouter) later.
|
||||
*
|
||||
* Example:
|
||||
* provider_preference: [kimi-coding, minimax, zai, mistral, opencode-go, opencode]
|
||||
*/
|
||||
provider_preference?: string[];
|
||||
}
|
||||
|
||||
export interface LoadedSFPreferences {
|
||||
|
|
|
|||
|
|
@ -392,6 +392,18 @@ export function validatePreferences(preferences: SFPreferences): {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Provider Preference (benchmark tie-break order) ────────────────
|
||||
if (preferences.provider_preference !== undefined) {
|
||||
if (Array.isArray(preferences.provider_preference) && preferences.provider_preference.every(s => typeof s === "string")) {
|
||||
const cleaned = preferences.provider_preference
|
||||
.map((s: string) => s.trim().toLowerCase())
|
||||
.filter((s: string) => s.length > 0);
|
||||
if (cleaned.length > 0) validated.provider_preference = cleaned;
|
||||
} else {
|
||||
errors.push("provider_preference must be an array of provider-ID strings");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Allowed Providers (hard allowlist) ─────────────────────────────
|
||||
// When set, model selection is gated to these providers only — any
|
||||
// model from any other provider is filtered out of the candidate set
|
||||
|
|
|
|||
|
|
@ -507,6 +507,7 @@ function mergePreferences(base: SFPreferences, override: SFPreferences): SFPrefe
|
|||
// of latent bug as service_tier (fixed separately). Each gets a simple
|
||||
// override-wins merge so the preference actually reaches consumers.
|
||||
allowed_providers: mergeStringLists(base.allowed_providers, override.allowed_providers),
|
||||
provider_preference: override.provider_preference ?? base.provider_preference,
|
||||
flat_rate_providers: mergeStringLists(base.flat_rate_providers, override.flat_rate_providers),
|
||||
stale_commit_threshold_minutes: override.stale_commit_threshold_minutes ?? base.stale_commit_threshold_minutes,
|
||||
widget_mode: override.widget_mode ?? base.widget_mode,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue