From 1d753af6b6b066783e9b93c9832f9d71c31bb6cf Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 02:57:27 +0200 Subject: [PATCH] docs(dev): draft model registry contract for upcoming refactor Spec for consolidating the three alias tables (benchmark-selector, auto-model-selection, model-router) into a single SF-extension registry that reads from @singularity-forge/ai's MODELS and enriches it with canonical_id, generation, and tier. Shared interface for parallel Swarm A/B/C work. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/dev/drafts/model-registry-contract.md | 328 +++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 docs/dev/drafts/model-registry-contract.md diff --git a/docs/dev/drafts/model-registry-contract.md b/docs/dev/drafts/model-registry-contract.md new file mode 100644 index 000000000..a9d49bb7c --- /dev/null +++ b/docs/dev/drafts/model-registry-contract.md @@ -0,0 +1,328 @@ +# Model Registry Contract (SF Extension Layer) + +**Status:** draft / WIP swarm contract +**Audience:** Swarm A (registry + alias migration), Swarm B (failover guard), Swarm C (metrics migration) + +## Background + +The upstream package `@singularity-forge/ai` (`node_modules/@singularity-forge/ai/dist/models.generated.js`) exports a `MODELS` constant — a `{ provider: { wire_id: ModelEntry } }` map. ~26 providers, 100+ entries. This is the single source of truth for **route-level** model data: + +```ts +type ModelEntry = { + id: string; // wire model id used on that provider (e.g. "moonshotai/kimi-k2.5") + name: string; // human display name + api: "anthropic-messages" | "openai-completions" + | "openai-responses" | "bedrock-converse-stream" + | "google-generative" | ...; // wire format + provider: string; // route identifier (e.g. "kimi-coding") + baseUrl: string; // route base url + reasoning?: boolean; + input?: ("text"|"image"|...)[]; + capabilities?: { thinkingNoBudget?: boolean, ... }; + cost?: { input, output, cacheRead, cacheWrite }; + contextWindow?: number; + maxTokens?: number; +}; +``` + +The SF extension currently maintains **three duplicate alias tables** that encode some of this knowledge separately, plus two pieces of knowledge the upstream registry does NOT have: + +1. **Canonical model identity** — mapping `(provider, wire_id)` → a generation-stable canonical id. E.g. all routes for Kimi K2.5 (`moonshotai/Kimi-K2.5`, `moonshotai/kimi-k2.5`, `kimi-k2.5`, `kimi-k2.5:cloud`) collapse to canonical id `kimi-k2.5`. + +2. **Capability tier** — `light` / `standard` / `heavy`, used by `model-router.js` for tier-aware routing. + +This contract defines the SF-extension module that consolidates all three duplicate tables, enriches the upstream registry with the two missing dimensions, and exposes a single API the extension consumes. + +## Module: `src/resources/extensions/sf/model-registry.ts` + +### Exports + +```ts +export type WireFormat = + | "anthropic-messages" + | "openai-completions" + | "openai-responses" + | "bedrock-converse-stream" + | "google-generative" + | string; // open enum — pass through unknown values from upstream + +export type CapabilityTier = "light" | "standard" | "heavy"; + +export type CanonicalId = string; +// Stable, generation-aware identity. Examples: +// "kimi-k2.5" (NOT the same as kimi-k2.6 — generation matters) +// "kimi-k2.6" +// "kimi-k2-thinking" +// "claude-sonnet-4-6" +// "MiniMax-M2.7" + +export type RouteKey = string; +// Format: `${provider}/${wire_id}`. Examples: +// "kimi-coding/kimi-k2.6" +// "openrouter/moonshotai/kimi-k2.5" +// "amazon-bedrock/moonshotai.kimi-k2.5" + +export interface ResolvedModel { + canonical_id: CanonicalId; + generation: string; // free-form, e.g. "k2.5", "k2.6", "sonnet-4-6" + tier: CapabilityTier; + // Pass-through from upstream ModelEntry: + wire_id: string; // the upstream entry's `id` + provider: string; + api: WireFormat; // wire format axis + baseUrl: string; + capabilities?: Record; + cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number }; + contextWindow?: number; + maxTokens?: number; + reasoning?: boolean; + inputModalities?: string[]; // renamed from upstream `input` for clarity +} + +/** Look up a (provider, wire_id) pair. Returns null if not in upstream. */ +export function lookup(provider: string, wireId: string): ResolvedModel | null; + +/** Same, parsed from a fused route key. */ +export function lookupRoute(routeKey: RouteKey): ResolvedModel | null; + +/** All routes (across all providers) that resolve to this canonical id. */ +export function routesFor(canonicalId: CanonicalId): ResolvedModel[]; + +/** Map a route key to a canonical id, or null if unmappable. */ +export function canonicalIdFor(routeKey: RouteKey): CanonicalId | null; + +/** Capability tier of a canonical id. */ +export function tierFor(canonicalId: CanonicalId): CapabilityTier | null; + +/** Generation of a canonical id (e.g. "k2.5"). */ +export function generationFor(canonicalId: CanonicalId): string | null; + +/** Two canonical ids share a generation (failover may cross). */ +export function sameGeneration(a: CanonicalId, b: CanonicalId): boolean; + +/** Iterate every canonical id known to SF. */ +export function allCanonicalIds(): CanonicalId[]; + +/** Build a route key from a resolved model (for metrics aggregation). */ +export function routeKeyOf(m: { provider: string; wire_id: string }): RouteKey; +``` + +### Internal data (the only hand-maintained tables) + +```ts +// (provider, wire_id) → canonical id +// Only entries that DIVERGE from `provider/wire_id` itself need a mapping. +// Entries that are already canonical (provider="kimi-coding", wire_id="kimi-k2.6") +// can be omitted; the resolver falls back to wire_id when no mapping exists. +const CANONICAL_BY_ROUTE: Record = { + "amazon-bedrock/moonshotai.kimi-k2.5": "kimi-k2.5", + "amazon-bedrock/moonshot.kimi-k2-thinking": "kimi-k2-thinking", + "groq/moonshotai/kimi-k2-instruct": "kimi-k2", + "groq/moonshotai/kimi-k2-instruct-0905": "kimi-k2-0905", + "huggingface/moonshotai/Kimi-K2-Instruct": "kimi-k2", + "huggingface/moonshotai/Kimi-K2-Instruct-0905": "kimi-k2-0905", + "huggingface/moonshotai/Kimi-K2-Thinking": "kimi-k2-thinking", + "huggingface/moonshotai/Kimi-K2.5": "kimi-k2.5", + "openrouter/moonshotai/kimi-k2": "kimi-k2", + "openrouter/moonshotai/kimi-k2-0905": "kimi-k2-0905", + "openrouter/moonshotai/kimi-k2-thinking": "kimi-k2-thinking", + "openrouter/moonshotai/kimi-k2.5": "kimi-k2.5", + "vercel-ai-gateway/moonshotai/kimi-k2": "kimi-k2", + "vercel-ai-gateway/moonshotai/kimi-k2-0905": "kimi-k2-0905", + "vercel-ai-gateway/moonshotai/kimi-k2-thinking": "kimi-k2-thinking", + "vercel-ai-gateway/moonshotai/kimi-k2-thinking-turbo": "kimi-k2-thinking-turbo", + "vercel-ai-gateway/moonshotai/kimi-k2-turbo": "kimi-k2-turbo", + "vercel-ai-gateway/moonshotai/kimi-k2.5": "kimi-k2.5", + "opencode/kimi-k2.5": "kimi-k2.5", + "opencode-go/kimi-k2.5": "kimi-k2.5", + "kimi-coding/kimi-k2.6": "kimi-k2.6", + "kimi-coding/kimi-k2-thinking": "kimi-k2-thinking", + // ... (full list: Swarm A populates by enumerating upstream MODELS during build) +}; + +// canonical id → generation tag +// Same-generation routes are eligible for direct failover (no downgrade signal). +const GENERATION: Record = { + "kimi-k2": "k2", + "kimi-k2-0905": "k2", // same generation, post-release patch + "kimi-k2.5": "k2.5", + "kimi-k2.6": "k2.6", + "kimi-k2-thinking": "k2-thinking", + "kimi-k2-thinking-turbo": "k2-thinking", + "kimi-k2-turbo": "k2", + "claude-haiku-4-5": "haiku-4", + "claude-sonnet-4-5": "sonnet-4", + "claude-sonnet-4-6": "sonnet-4", + "claude-opus-4-5": "opus-4", + "claude-opus-4-7": "opus-4", + // ... extracted from current MODEL_CAPABILITY_TIER + model-router knowledge +}; + +// canonical id → tier +// Lifted directly from MODEL_CAPABILITY_TIER in model-router.js, with K2.5 → K2.6 +// alias REMOVED (latent bug — kept here as distinct tiers per generation). +const TIER: Record = { + "claude-haiku-4-5": "light", + "claude-sonnet-4-6": "standard", + "claude-opus-4-7": "heavy", + "kimi-k2.5": "standard", + "kimi-k2.6": "standard", + // ... +}; +``` + +### Resolution algorithm + +``` +lookup(provider, wireId): + upstream = MODELS[provider]?.[wireId] + if !upstream: return null + routeKey = `${provider}/${wireId}` + canonical = CANONICAL_BY_ROUTE[routeKey] ?? wireId + return { + canonical_id: canonical, + generation: GENERATION[canonical] ?? canonical, + tier: TIER[canonical] ?? "standard", + wire_id: upstream.id, + provider: upstream.provider, + api: upstream.api, + baseUrl: upstream.baseUrl, + capabilities: upstream.capabilities, + cost: upstream.cost, + contextWindow: upstream.contextWindow, + maxTokens: upstream.maxTokens, + reasoning: upstream.reasoning, + inputModalities: upstream.input, + } +``` + +## Consumer migrations + +### `benchmark-selector.js` (Swarm A) + +**Remove** `BENCHMARK_KEY_ALIASES` (lines ~271-283). **Replace** every alias lookup with: + +```js +import { canonicalIdFor, lookup } from "./model-registry.js"; + +// Old: +// const semantic = BENCHMARK_KEY_ALIASES[wireId] ?? wireId; +// New: +const resolved = lookup(provider, wireId); +const semantic = resolved?.canonical_id ?? wireId; +``` + +### `auto-model-selection.js` (Swarm A) + +**Remove** `preferredBareModelIds()` (lines ~127-151). **Replace** with: + +```js +import { routesFor } from "./model-registry.js"; + +// Old: +// const ids = preferredBareModelIds(canonical); +// New: +const ids = routesFor(canonical).map(r => r.wire_id); +``` + +### `model-router.js` (Swarm A) + +**Remove** the capability-tier alias block (lines ~963-967, including the buggy `"kimi-k2.5": "kimi-k2.6"` downgrade alias). **Replace** the tier lookup with: + +```js +import { tierFor, canonicalIdFor } from "./model-registry.js"; + +// Old: +// const tier = MODEL_CAPABILITY_TIER[modelId] ?? "standard"; +// New: +const canonical = canonicalIdFor(routeKey) ?? modelId; +const tier = tierFor(canonical) ?? "standard"; +``` + +`MODEL_CAPABILITY_TIER` itself becomes the seed data for the registry's `TIER` table — Swarm A migrates the current table entries into the registry, then deletes the local table. + +### `model-route-failure.js` (Swarm B) + +**Patch** `resolveNextAvailableModelRoute()` (lines ~84-111) to honor canonical id and generation: + +```js +import { canonicalIdFor, sameGeneration } from "./model-registry.js"; + +function resolveNextAvailableModelRoute(failedRoutes, currentRoute, unitType) { + const currentCanonical = canonicalIdFor(currentRoute); + const isSolverPinned = unitType === "solve" /* or whatever ADR-0079's exact name */; + + for (const candidate of candidateRoutes) { + if (failedRoutes.includes(candidate)) continue; + const candidateCanonical = canonicalIdFor(candidate); + + // Solver pin: ADR-0079. Never cross canonical_id boundary when solving. + if (isSolverPinned && candidateCanonical !== currentCanonical) continue; + + // Generation guard: even for non-solver, log a generation downgrade explicitly. + if (!sameGeneration(currentCanonical, candidateCanonical)) { + logGenerationDowngrade(currentCanonical, candidateCanonical, unitType); + } + return candidate; + } + return null; +} +``` + +`logGenerationDowngrade` writes a structured log event so it's visible in traces / observability. + +### `model-learner.js` + `metrics.js` (Swarm C) + +**Refactor** `model-performance.json` schema from fused `{ "kimi-coding/kimi-k2.6": {...} }` to canonical-keyed with `by_route` breakdown: + +```json +{ + "execute-task": { + "kimi-k2.6": { + "aggregate": { "successes": 5, "failures": 0, "timeouts": 0, ... }, + "by_route": { + "kimi-coding/kimi-k2.6": { "successes": 5, "failures": 0, ... } + } + }, + "_unmapped": { + // any routeKey that doesn't resolve via canonicalIdFor() lands here, + // so we don't silently drop data during the migration + "by_route": { "foo-provider/bar-model": { "successes": 1, ... } } + } + } +} +``` + +**Migration:** when `model-learner.js` boots and reads the old-schema file, it should: + +1. Detect old format (top-level keys are `provider/wireId` not canonical ids). +2. Distribute each entry into `.by_route[]`. +3. Recompute `.aggregate` as the sum of `by_route`. +4. Write the new format back. Keep a single backup at `.sf/model-performance.json.pre-canonical-backup`. + +**Reads:** + +- Auto-selection scoring should query `aggregate` for cross-route model strength. +- Per-route health (for failover ordering) should query `by_route[routeKey]`. + +## Test expectations + +**Swarm A:** for every entry currently in `BENCHMARK_KEY_ALIASES`, `preferredBareModelIds`, and `MODEL_CAPABILITY_TIER`, prove pre/post equivalence via fixture tests. **Specifically test that the K2.5 → K2.6 tier alias is gone and K2.5 now resolves to its own tier entry.** + +**Swarm B:** unit tests for: +- Solver pinned to `kimi-k2.6` cannot fail over to `kimi-k2.5` (different canonical_id). +- Same-canonical multi-route failover works (kimi-coding/kimi-k2.6 → some other route for kimi-k2.6 if one existed). +- Cross-generation downgrade (when no same-canonical route is left) emits `logGenerationDowngrade`. + +**Swarm C:** unit tests for: +- Migration round-trip (load old, write new, reload, by-route entries unchanged). +- `aggregate.successes == sum(by_route[*].successes)`. +- Unmappable route keys end up in `_unmapped`, not dropped. +- Reading a never-seen canonical_id returns sensible defaults. + +## Integration order + +1. Swarm A lands first (creates `model-registry.ts`, migrates 3 callsites, removes duplicate tables). +2. Swarm B + Swarm C run in parallel after A merges (both consume A's `model-registry.ts`). +3. `npm run copy-resources` after each merge. +4. `npm run test:unit` clean.