feat(triage): route runTriage via model-router using operator allowlist

Drops the hardcoded "google-gemini-cli/gemini-3-pro-preview" default and
routes through SF's own model-router using a new
BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy: coding 0.4,
instruction 0.8, reasoning 0.8, agentic 0.9).

Candidate selection priority:
  1. Explicit options.model override (operator --model)
  2. options.candidates (test injection)
  3. ~/.sf/agent/settings.json enabledModels (expanded against pi-ai
     MODELS catalog) + defaultProvider/defaultModel
  4. TRIAGE_FALLBACK_CANDIDATES — Chinese-provider set
     (kimi + minimax + zai). Gemini intentionally NOT in the fallback
     so operators who removed it from settings don't silently re-default.

Dispatch walks the router-ranked list with retry-on-credential-error so
the top pick failing on missing API keys falls through to the next
candidate (caught the openai-no-key case in dogfood today).

Closes part 1 of sf-mp5khix3-9beona AC1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 16:29:56 +02:00
parent e2dd625d7d
commit 98d1b2b258
3 changed files with 279 additions and 61 deletions

View file

@ -1080,6 +1080,18 @@ export const BASE_REQUIREMENTS = {
"run-uat": { instruction: 0.7, speed: 0.8, agentic: 0.6 },
"discuss-milestone": { reasoning: 0.6, instruction: 0.7, agentic: 0.4 },
"complete-milestone": { instruction: 0.8, reasoning: 0.5, agentic: 0.5 },
// Self-feedback triage is an agentic task: read open entries, reason
// about decisions (fix/promote/close), optionally call resolve_issue.
// Higher agentic weight than execute-task because triage decisions
// involve significant judgment + structured tool calls; lower coding
// weight because the model isn't writing implementation code in this
// dispatch.
"self-feedback-triage": {
coding: 0.4,
instruction: 0.8,
reasoning: 0.8,
agentic: 0.9,
},
};
// ─── Public API ──────────────────────────────────────────────────────────────
/**

View file

@ -338,16 +338,26 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) {
*
* Consumer: register-hooks.ts turn_end handler.
*/
/**
* Default provider/model used by runTriage when --model is not supplied.
* Matches DEFAULT_REFLECTION_MODEL both reasoning passes benefit from the
* same pro tier and route through the operator's persistent gemini-cli
* session by default.
*/
const DEFAULT_TRIAGE_MODEL = "google-gemini-cli/gemini-3-pro-preview";
const TRIAGE_TERMINATOR = "Self-feedback triage complete";
/**
* Last-resort candidates if reading the operator's settings.json fails. The
* Chinese-provider set matches typical SF operator config (kimi + minimax +
* zai). Gemini is intentionally NOT in this list when the operator has
* gemini as default they'll have it in settings.json and we'll pick it up
* from there; hardcoding it here would silently re-default to gemini even
* for operators who explicitly removed it. The router scores these via
* BASE_REQUIREMENTS["self-feedback-triage"] before dispatch.
*
* Operator --model override always wins; operator settings.json enabledModels
* always wins over this fallback list.
*/
const TRIAGE_FALLBACK_CANDIDATES = [
"kimi-coding/kimi-for-coding",
"minimax/MiniMax-M2.7",
"zai/glm-5",
];
function parseTriageModelString(input) {
if (typeof input !== "string") return null;
const slash = input.indexOf("/");
@ -368,6 +378,110 @@ async function resolveTriageModel(providerModelString) {
}
}
/**
* Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json
* and expand "provider/*" wildcards against @singularity-forge/ai's MODELS
* catalog. Always also include defaultProvider/defaultModel from the same
* settings so the operator's chosen default is never silently dropped.
*
* Returns null on any failure callers fall back to TRIAGE_FALLBACK_CANDIDATES.
*/
async function readOperatorTriageCandidates() {
try {
const { getSfAgentSettingsPath } = await import("./preferences.js");
const path = getSfAgentSettingsPath();
if (!existsSync(path)) return null;
const settings = JSON.parse(readFileSync(path, "utf-8"));
const enabled = Array.isArray(settings?.enabledModels)
? settings.enabledModels
: [];
const defaultProvider = settings?.defaultProvider;
const defaultModel = settings?.defaultModel;
const result = new Set();
if (defaultProvider && defaultModel) {
result.add(`${defaultProvider}/${defaultModel}`);
}
// Expand wildcards by walking the pi-ai MODELS catalog for matching
// provider entries. Exact "provider/modelId" entries pass through.
const ai = await import("@singularity-forge/ai");
for (const entry of enabled) {
if (typeof entry !== "string" || !entry.includes("/")) continue;
const slash = entry.indexOf("/");
const provider = entry.slice(0, slash);
const modelGlob = entry.slice(slash + 1);
if (modelGlob !== "*") {
result.add(entry);
continue;
}
// Wildcard: enumerate models registered under this provider that
// have an agentic profile we can score. getModelsByProvider is the
// canonical pi-ai accessor; if it isn't available we just skip.
if (typeof ai.getModelsByProvider === "function") {
const models = ai.getModelsByProvider(provider) ?? [];
for (const m of models) {
if (m?.id) result.add(`${provider}/${m.id}`);
}
}
}
const list = Array.from(result);
return list.length > 0 ? list : null;
} catch {
return null;
}
}
/**
* Score the operator-derived candidate list via SF's model-router using the
* self-feedback-triage requirements profile (agentic-heavy). Returns the
* "provider/modelId" strings in best-first order. Used by runTriage for
* dispatch-with-fallback when the top pick lacks credentials.
*
* Priority order (sf-mp5khix3-9beona AC1 + user correction):
* 1. explicit candidates passed by caller (test injection)
* 2. operator's settings.json enabledModels (expanded) + defaultModel
* 3. TRIAGE_FALLBACK_CANDIDATES only when settings.json is missing or
* yields no usable models. This is the safe Chinese+Gemini set that
* matches the operator's typical config, NOT a universal model dump.
*
* Consumer: runTriage (when operator doesn't pass --model).
*/
async function rankTriageModelsViaRouter(candidates) {
const candidateList =
(Array.isArray(candidates) && candidates.length > 0
? candidates
: await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES;
try {
const { BASE_REQUIREMENTS, scoreEligibleModels } = await import(
"./model-router.js"
);
const ranked = scoreEligibleModels(
candidateList,
BASE_REQUIREMENTS["self-feedback-triage"],
);
const ids = ranked.map((r) => r.modelId).filter(Boolean);
return ids.length > 0 ? ids : candidateList;
} catch {
return candidateList;
}
}
/**
* Heuristic: detect provider-credential failures that warrant trying the
* next-best candidate instead of surfacing the error. We match on
* substrings that pi-ai providers actually emit on missing-key paths.
*/
function isCredentialError(message) {
if (typeof message !== "string") return false;
return (
/no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
message,
)
);
}
function extractAssistantText(message) {
const content = message?.content;
if (!Array.isArray(content)) return "";
@ -407,12 +521,21 @@ export function writeTriageDecisionReport(basePath, content) {
/**
* Run a triage pass against the canonical inline-fix prompt.
*
* Provider-agnostic: routes through @singularity-forge/ai's completeSimple,
* mirroring runReflection. Captures the model's decision text the actual
* fix/promote/close actions are operator-applied (or, in a follow-up, a
* tool-enabled variant will let the model call resolve_issue directly).
* Model selection (sf-mp5khix3-9beona AC1): when options.model is supplied
* the operator's choice wins. Otherwise, route through SF's model-router
* using BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy) over the
* TRIAGE_CANDIDATES list. Falls back to TRIAGE_FALLBACK_MODEL if scoring
* fails. No more hardcoded gemini default.
*
* options.model "provider/modelId" string. Defaults to DEFAULT_TRIAGE_MODEL.
* Provider-agnostic dispatch: routes through @singularity-forge/ai's
* completeSimple, mirroring runReflection. Today's path captures decision
* text only the actual fix/promote/close actions are operator-applied.
* A tool-enabled --apply variant (where the model calls resolve_issue
* directly) lands in a follow-up (sf-mp5khix3-9beona AC2).
*
* options.model "provider/modelId" override. Defaults to router pick.
* options.candidates override the candidate list for router-based pick
* (mostly useful in tests).
* options.complete dependency injection for tests; same shape as the
* reflection runner.
* options.timeoutMs defaults to 8 minutes.
@ -423,24 +546,21 @@ export function writeTriageDecisionReport(basePath, content) {
* Consumer: headless-triage operator surface (--run flag).
*/
export async function runTriage(prompt, options = {}) {
const modelString = options.model ?? DEFAULT_TRIAGE_MODEL;
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
let model;
try {
model = await resolveTriageModel(modelString);
} catch (err) {
return {
ok: false,
error: `failed to load model catalog: ${getErrorMessage(err)}`,
};
}
if (!model) {
return {
ok: false,
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
};
}
// Build the dispatch order. Operator --model wins as a single-shot
// pick (no fallback). Without --model, walk the router-ranked list and
// fall through to the next candidate on credential errors.
const dispatchOrder = options.model
? [options.model]
: await rankTriageModelsViaRouter(options.candidates);
const completeFn =
options.complete ??
(await (async () => {
const ai = await import("@singularity-forge/ai");
return ai.completeSimple;
})());
const context = {
systemPrompt: undefined,
@ -453,46 +573,84 @@ export async function runTriage(prompt, options = {}) {
],
};
const completeFn =
options.complete ??
(await (async () => {
const ai = await import("@singularity-forge/ai");
return ai.completeSimple;
})());
const callPromise = (async () => {
const attemptOne = async (modelString) => {
let model;
try {
const message = await completeFn(model, context, {});
const content = extractAssistantText(message);
return {
ok: true,
content,
cleanFinish: content.includes(TRIAGE_TERMINATOR),
provider: model.provider,
modelId: model.id,
};
model = await resolveTriageModel(modelString);
} catch (err) {
return {
ok: false,
error: `provider call failed: ${getErrorMessage(err)}`,
provider: model.provider,
modelId: model.id,
error: `failed to load model catalog: ${getErrorMessage(err)}`,
modelString,
};
}
})();
const timeoutPromise = new Promise((resolve) => {
setTimeout(() => {
resolve({
if (!model) {
return {
ok: false,
error: `triage call timed out after ${timeoutMs}ms`,
provider: model.provider,
modelId: model.id,
});
}, timeoutMs);
});
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
modelString,
};
}
return Promise.race([callPromise, timeoutPromise]);
const callPromise = (async () => {
try {
const message = await completeFn(model, context, {});
const content = extractAssistantText(message);
return {
ok: true,
content,
cleanFinish: content.includes(TRIAGE_TERMINATOR),
provider: model.provider,
modelId: model.id,
};
} catch (err) {
return {
ok: false,
error: `provider call failed: ${getErrorMessage(err)}`,
provider: model.provider,
modelId: model.id,
};
}
})();
const timeoutPromise = new Promise((resolve) => {
setTimeout(() => {
resolve({
ok: false,
error: `triage call timed out after ${timeoutMs}ms`,
provider: model.provider,
modelId: model.id,
});
}, timeoutMs);
});
return Promise.race([callPromise, timeoutPromise]);
};
// Walk the ranked list. Retry-on-credential-error keeps us from giving
// up because the highest-scoring model happens to lack credentials in
// the operator's environment (e.g. router picks openai-codex-responses
// when the operator only has anthropic + gemini-cli auth).
const attempts = [];
for (const modelString of dispatchOrder) {
const result = await attemptOne(modelString);
attempts.push({ modelString, error: result.error });
if (result.ok) return result;
// Only fall through on credential errors. Unknown-model + timeout +
// other errors surface as-is (no point retrying with a different
// model if the prompt itself is broken or the network is down).
if (!isCredentialError(result.error) || dispatchOrder.length <= 1) {
return result;
}
}
// All candidates exhausted on credential errors.
const lastErr = attempts[attempts.length - 1];
return {
ok: false,
error: `provider call failed: no candidate model had usable credentials (tried ${attempts.length}: ${attempts.map((a) => a.modelString).join(", ")}). Last error: ${lastErr?.error ?? "unknown"}`,
modelId: lastErr?.modelString,
};
}
export function consumeCompletedInlineFixClaim(basePath) {

View file

@ -328,6 +328,54 @@ describe("runTriage (dependency-injected)", () => {
});
});
describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => {
test("router scores candidates by agentic profile and picks the best", async () => {
// Test fixture uses the operator's actual provider allowlist
// (minimax + kimi-coding) instead of cross-provider examples:
// - MiniMax-M2.1 has agentic 40 (older gen, penalized after
// the 60+ checkpoint loop incident on 2026-05-13)
// - kimi-for-coding aliases to kimi-k2.6 capability profile,
// agentic 90 (pinned autonomous-solver default per ADR-0079)
// With agentic-heavy requirements the router must pick kimi.
const seen = { modelId: null };
await runTriage("prompt", {
candidates: [
"minimax/MiniMax-M2.1",
"kimi-coding/kimi-for-coding",
],
complete: async (model) => {
seen.modelId = model.id;
return { content: [{ type: "text", text: "ok" }] };
},
});
expect(seen.modelId).toBe("kimi-for-coding");
});
test("operator --model still wins over router pick", async () => {
const seen = { modelId: null };
await runTriage("prompt", {
model: "anthropic/claude-sonnet-4-6",
complete: async (model) => {
seen.modelId = model.id;
return { content: [{ type: "text", text: "ok" }] };
},
});
expect(seen.modelId).toBe("claude-sonnet-4-6");
});
test("candidates override picks from a custom list", async () => {
const seen = { modelId: null };
await runTriage("prompt", {
candidates: ["anthropic/claude-opus-4-6"],
complete: async (model) => {
seen.modelId = model.id;
return { content: [{ type: "text", text: "ok" }] };
},
});
expect(seen.modelId).toBe("claude-opus-4-6");
});
});
describe("writeTriageDecisionReport", () => {
test("writes to .sf/triage/decisions/<ts>.md and returns the path", () => {
const dir = makeForgeProject();