diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js index 6ed7a720c..89ce9db80 100644 --- a/src/resources/extensions/sf/model-router.js +++ b/src/resources/extensions/sf/model-router.js @@ -1080,6 +1080,18 @@ export const BASE_REQUIREMENTS = { "run-uat": { instruction: 0.7, speed: 0.8, agentic: 0.6 }, "discuss-milestone": { reasoning: 0.6, instruction: 0.7, agentic: 0.4 }, "complete-milestone": { instruction: 0.8, reasoning: 0.5, agentic: 0.5 }, + // Self-feedback triage is an agentic task: read open entries, reason + // about decisions (fix/promote/close), optionally call resolve_issue. + // Higher agentic weight than execute-task because triage decisions + // involve significant judgment + structured tool calls; lower coding + // weight because the model isn't writing implementation code in this + // dispatch. + "self-feedback-triage": { + coding: 0.4, + instruction: 0.8, + reasoning: 0.8, + agentic: 0.9, + }, }; // ─── Public API ────────────────────────────────────────────────────────────── /** diff --git a/src/resources/extensions/sf/self-feedback-drain.js b/src/resources/extensions/sf/self-feedback-drain.js index 8e589e531..8a24c96c0 100644 --- a/src/resources/extensions/sf/self-feedback-drain.js +++ b/src/resources/extensions/sf/self-feedback-drain.js @@ -338,16 +338,26 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) { * * Consumer: register-hooks.ts turn_end handler. */ -/** - * Default provider/model used by runTriage when --model is not supplied. - * Matches DEFAULT_REFLECTION_MODEL — both reasoning passes benefit from the - * same pro tier and route through the operator's persistent gemini-cli - * session by default. - */ -const DEFAULT_TRIAGE_MODEL = "google-gemini-cli/gemini-3-pro-preview"; - const TRIAGE_TERMINATOR = "Self-feedback triage complete"; +/** + * Last-resort candidates if reading the operator's settings.json fails. The + * Chinese-provider set matches typical SF operator config (kimi + minimax + + * zai). Gemini is intentionally NOT in this list — when the operator has + * gemini as default they'll have it in settings.json and we'll pick it up + * from there; hardcoding it here would silently re-default to gemini even + * for operators who explicitly removed it. The router scores these via + * BASE_REQUIREMENTS["self-feedback-triage"] before dispatch. + * + * Operator --model override always wins; operator settings.json enabledModels + * always wins over this fallback list. + */ +const TRIAGE_FALLBACK_CANDIDATES = [ + "kimi-coding/kimi-for-coding", + "minimax/MiniMax-M2.7", + "zai/glm-5", +]; + function parseTriageModelString(input) { if (typeof input !== "string") return null; const slash = input.indexOf("/"); @@ -368,6 +378,110 @@ async function resolveTriageModel(providerModelString) { } } +/** + * Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json + * and expand "provider/*" wildcards against @singularity-forge/ai's MODELS + * catalog. Always also include defaultProvider/defaultModel from the same + * settings so the operator's chosen default is never silently dropped. + * + * Returns null on any failure — callers fall back to TRIAGE_FALLBACK_CANDIDATES. + */ +async function readOperatorTriageCandidates() { + try { + const { getSfAgentSettingsPath } = await import("./preferences.js"); + const path = getSfAgentSettingsPath(); + if (!existsSync(path)) return null; + const settings = JSON.parse(readFileSync(path, "utf-8")); + const enabled = Array.isArray(settings?.enabledModels) + ? settings.enabledModels + : []; + const defaultProvider = settings?.defaultProvider; + const defaultModel = settings?.defaultModel; + + const result = new Set(); + if (defaultProvider && defaultModel) { + result.add(`${defaultProvider}/${defaultModel}`); + } + + // Expand wildcards by walking the pi-ai MODELS catalog for matching + // provider entries. Exact "provider/modelId" entries pass through. + const ai = await import("@singularity-forge/ai"); + for (const entry of enabled) { + if (typeof entry !== "string" || !entry.includes("/")) continue; + const slash = entry.indexOf("/"); + const provider = entry.slice(0, slash); + const modelGlob = entry.slice(slash + 1); + if (modelGlob !== "*") { + result.add(entry); + continue; + } + // Wildcard: enumerate models registered under this provider that + // have an agentic profile we can score. getModelsByProvider is the + // canonical pi-ai accessor; if it isn't available we just skip. + if (typeof ai.getModelsByProvider === "function") { + const models = ai.getModelsByProvider(provider) ?? []; + for (const m of models) { + if (m?.id) result.add(`${provider}/${m.id}`); + } + } + } + + const list = Array.from(result); + return list.length > 0 ? list : null; + } catch { + return null; + } +} + +/** + * Score the operator-derived candidate list via SF's model-router using the + * self-feedback-triage requirements profile (agentic-heavy). Returns the + * "provider/modelId" strings in best-first order. Used by runTriage for + * dispatch-with-fallback when the top pick lacks credentials. + * + * Priority order (sf-mp5khix3-9beona AC1 + user correction): + * 1. explicit candidates passed by caller (test injection) + * 2. operator's settings.json enabledModels (expanded) + defaultModel + * 3. TRIAGE_FALLBACK_CANDIDATES — only when settings.json is missing or + * yields no usable models. This is the safe Chinese+Gemini set that + * matches the operator's typical config, NOT a universal model dump. + * + * Consumer: runTriage (when operator doesn't pass --model). + */ +async function rankTriageModelsViaRouter(candidates) { + const candidateList = + (Array.isArray(candidates) && candidates.length > 0 + ? candidates + : await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES; + try { + const { BASE_REQUIREMENTS, scoreEligibleModels } = await import( + "./model-router.js" + ); + const ranked = scoreEligibleModels( + candidateList, + BASE_REQUIREMENTS["self-feedback-triage"], + ); + const ids = ranked.map((r) => r.modelId).filter(Boolean); + return ids.length > 0 ? ids : candidateList; + } catch { + return candidateList; + } +} + +/** + * Heuristic: detect provider-credential failures that warrant trying the + * next-best candidate instead of surfacing the error. We match on + * substrings that pi-ai providers actually emit on missing-key paths. + */ +function isCredentialError(message) { + if (typeof message !== "string") return false; + return ( + /no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test( + message, + ) + ); +} + function extractAssistantText(message) { const content = message?.content; if (!Array.isArray(content)) return ""; @@ -407,12 +521,21 @@ export function writeTriageDecisionReport(basePath, content) { /** * Run a triage pass against the canonical inline-fix prompt. * - * Provider-agnostic: routes through @singularity-forge/ai's completeSimple, - * mirroring runReflection. Captures the model's decision text — the actual - * fix/promote/close actions are operator-applied (or, in a follow-up, a - * tool-enabled variant will let the model call resolve_issue directly). + * Model selection (sf-mp5khix3-9beona AC1): when options.model is supplied + * the operator's choice wins. Otherwise, route through SF's model-router + * using BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy) over the + * TRIAGE_CANDIDATES list. Falls back to TRIAGE_FALLBACK_MODEL if scoring + * fails. No more hardcoded gemini default. * - * options.model — "provider/modelId" string. Defaults to DEFAULT_TRIAGE_MODEL. + * Provider-agnostic dispatch: routes through @singularity-forge/ai's + * completeSimple, mirroring runReflection. Today's path captures decision + * text only — the actual fix/promote/close actions are operator-applied. + * A tool-enabled --apply variant (where the model calls resolve_issue + * directly) lands in a follow-up (sf-mp5khix3-9beona AC2). + * + * options.model — "provider/modelId" override. Defaults to router pick. + * options.candidates — override the candidate list for router-based pick + * (mostly useful in tests). * options.complete — dependency injection for tests; same shape as the * reflection runner. * options.timeoutMs — defaults to 8 minutes. @@ -423,24 +546,21 @@ export function writeTriageDecisionReport(basePath, content) { * Consumer: headless-triage operator surface (--run flag). */ export async function runTriage(prompt, options = {}) { - const modelString = options.model ?? DEFAULT_TRIAGE_MODEL; const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000; - let model; - try { - model = await resolveTriageModel(modelString); - } catch (err) { - return { - ok: false, - error: `failed to load model catalog: ${getErrorMessage(err)}`, - }; - } - if (!model) { - return { - ok: false, - error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`, - }; - } + // Build the dispatch order. Operator --model wins as a single-shot + // pick (no fallback). Without --model, walk the router-ranked list and + // fall through to the next candidate on credential errors. + const dispatchOrder = options.model + ? [options.model] + : await rankTriageModelsViaRouter(options.candidates); + + const completeFn = + options.complete ?? + (await (async () => { + const ai = await import("@singularity-forge/ai"); + return ai.completeSimple; + })()); const context = { systemPrompt: undefined, @@ -453,46 +573,84 @@ export async function runTriage(prompt, options = {}) { ], }; - const completeFn = - options.complete ?? - (await (async () => { - const ai = await import("@singularity-forge/ai"); - return ai.completeSimple; - })()); - - const callPromise = (async () => { + const attemptOne = async (modelString) => { + let model; try { - const message = await completeFn(model, context, {}); - const content = extractAssistantText(message); - return { - ok: true, - content, - cleanFinish: content.includes(TRIAGE_TERMINATOR), - provider: model.provider, - modelId: model.id, - }; + model = await resolveTriageModel(modelString); } catch (err) { return { ok: false, - error: `provider call failed: ${getErrorMessage(err)}`, - provider: model.provider, - modelId: model.id, + error: `failed to load model catalog: ${getErrorMessage(err)}`, + modelString, }; } - })(); - - const timeoutPromise = new Promise((resolve) => { - setTimeout(() => { - resolve({ + if (!model) { + return { ok: false, - error: `triage call timed out after ${timeoutMs}ms`, - provider: model.provider, - modelId: model.id, - }); - }, timeoutMs); - }); + error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`, + modelString, + }; + } - return Promise.race([callPromise, timeoutPromise]); + const callPromise = (async () => { + try { + const message = await completeFn(model, context, {}); + const content = extractAssistantText(message); + return { + ok: true, + content, + cleanFinish: content.includes(TRIAGE_TERMINATOR), + provider: model.provider, + modelId: model.id, + }; + } catch (err) { + return { + ok: false, + error: `provider call failed: ${getErrorMessage(err)}`, + provider: model.provider, + modelId: model.id, + }; + } + })(); + + const timeoutPromise = new Promise((resolve) => { + setTimeout(() => { + resolve({ + ok: false, + error: `triage call timed out after ${timeoutMs}ms`, + provider: model.provider, + modelId: model.id, + }); + }, timeoutMs); + }); + + return Promise.race([callPromise, timeoutPromise]); + }; + + // Walk the ranked list. Retry-on-credential-error keeps us from giving + // up because the highest-scoring model happens to lack credentials in + // the operator's environment (e.g. router picks openai-codex-responses + // when the operator only has anthropic + gemini-cli auth). + const attempts = []; + for (const modelString of dispatchOrder) { + const result = await attemptOne(modelString); + attempts.push({ modelString, error: result.error }); + if (result.ok) return result; + // Only fall through on credential errors. Unknown-model + timeout + + // other errors surface as-is (no point retrying with a different + // model if the prompt itself is broken or the network is down). + if (!isCredentialError(result.error) || dispatchOrder.length <= 1) { + return result; + } + } + + // All candidates exhausted on credential errors. + const lastErr = attempts[attempts.length - 1]; + return { + ok: false, + error: `provider call failed: no candidate model had usable credentials (tried ${attempts.length}: ${attempts.map((a) => a.modelString).join(", ")}). Last error: ${lastErr?.error ?? "unknown"}`, + modelId: lastErr?.modelString, + }; } export function consumeCompletedInlineFixClaim(basePath) { diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs index 35d24c7a5..ea68276d6 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs @@ -328,6 +328,54 @@ describe("runTriage (dependency-injected)", () => { }); }); +describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => { + test("router scores candidates by agentic profile and picks the best", async () => { + // Test fixture uses the operator's actual provider allowlist + // (minimax + kimi-coding) instead of cross-provider examples: + // - MiniMax-M2.1 has agentic 40 (older gen, penalized after + // the 60+ checkpoint loop incident on 2026-05-13) + // - kimi-for-coding aliases to kimi-k2.6 capability profile, + // agentic 90 (pinned autonomous-solver default per ADR-0079) + // With agentic-heavy requirements the router must pick kimi. + const seen = { modelId: null }; + await runTriage("prompt", { + candidates: [ + "minimax/MiniMax-M2.1", + "kimi-coding/kimi-for-coding", + ], + complete: async (model) => { + seen.modelId = model.id; + return { content: [{ type: "text", text: "ok" }] }; + }, + }); + expect(seen.modelId).toBe("kimi-for-coding"); + }); + + test("operator --model still wins over router pick", async () => { + const seen = { modelId: null }; + await runTriage("prompt", { + model: "anthropic/claude-sonnet-4-6", + complete: async (model) => { + seen.modelId = model.id; + return { content: [{ type: "text", text: "ok" }] }; + }, + }); + expect(seen.modelId).toBe("claude-sonnet-4-6"); + }); + + test("candidates override picks from a custom list", async () => { + const seen = { modelId: null }; + await runTriage("prompt", { + candidates: ["anthropic/claude-opus-4-6"], + complete: async (model) => { + seen.modelId = model.id; + return { content: [{ type: "text", text: "ok" }] }; + }, + }); + expect(seen.modelId).toBe("claude-opus-4-6"); + }); +}); + describe("writeTriageDecisionReport", () => { test("writes to .sf/triage/decisions/.md and returns the path", () => { const dir = makeForgeProject();