feat(triage): route runTriage via model-router using operator allowlist
Drops the hardcoded "google-gemini-cli/gemini-3-pro-preview" default and
routes through SF's own model-router using a new
BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy: coding 0.4,
instruction 0.8, reasoning 0.8, agentic 0.9).
Candidate selection priority:
1. Explicit options.model override (operator --model)
2. options.candidates (test injection)
3. ~/.sf/agent/settings.json enabledModels (expanded against pi-ai
MODELS catalog) + defaultProvider/defaultModel
4. TRIAGE_FALLBACK_CANDIDATES — Chinese-provider set
(kimi + minimax + zai). Gemini intentionally NOT in the fallback
so operators who removed it from settings don't silently re-default.
Dispatch walks the router-ranked list with retry-on-credential-error so
the top pick failing on missing API keys falls through to the next
candidate (caught the openai-no-key case in dogfood today).
Closes part 1 of sf-mp5khix3-9beona AC1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e2dd625d7d
commit
98d1b2b258
3 changed files with 279 additions and 61 deletions
|
|
@ -1080,6 +1080,18 @@ export const BASE_REQUIREMENTS = {
|
|||
"run-uat": { instruction: 0.7, speed: 0.8, agentic: 0.6 },
|
||||
"discuss-milestone": { reasoning: 0.6, instruction: 0.7, agentic: 0.4 },
|
||||
"complete-milestone": { instruction: 0.8, reasoning: 0.5, agentic: 0.5 },
|
||||
// Self-feedback triage is an agentic task: read open entries, reason
|
||||
// about decisions (fix/promote/close), optionally call resolve_issue.
|
||||
// Higher agentic weight than execute-task because triage decisions
|
||||
// involve significant judgment + structured tool calls; lower coding
|
||||
// weight because the model isn't writing implementation code in this
|
||||
// dispatch.
|
||||
"self-feedback-triage": {
|
||||
coding: 0.4,
|
||||
instruction: 0.8,
|
||||
reasoning: 0.8,
|
||||
agentic: 0.9,
|
||||
},
|
||||
};
|
||||
// ─── Public API ──────────────────────────────────────────────────────────────
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -338,16 +338,26 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) {
|
|||
*
|
||||
* Consumer: register-hooks.ts turn_end handler.
|
||||
*/
|
||||
/**
|
||||
* Default provider/model used by runTriage when --model is not supplied.
|
||||
* Matches DEFAULT_REFLECTION_MODEL — both reasoning passes benefit from the
|
||||
* same pro tier and route through the operator's persistent gemini-cli
|
||||
* session by default.
|
||||
*/
|
||||
const DEFAULT_TRIAGE_MODEL = "google-gemini-cli/gemini-3-pro-preview";
|
||||
|
||||
const TRIAGE_TERMINATOR = "Self-feedback triage complete";
|
||||
|
||||
/**
|
||||
* Last-resort candidates if reading the operator's settings.json fails. The
|
||||
* Chinese-provider set matches typical SF operator config (kimi + minimax +
|
||||
* zai). Gemini is intentionally NOT in this list — when the operator has
|
||||
* gemini as default they'll have it in settings.json and we'll pick it up
|
||||
* from there; hardcoding it here would silently re-default to gemini even
|
||||
* for operators who explicitly removed it. The router scores these via
|
||||
* BASE_REQUIREMENTS["self-feedback-triage"] before dispatch.
|
||||
*
|
||||
* Operator --model override always wins; operator settings.json enabledModels
|
||||
* always wins over this fallback list.
|
||||
*/
|
||||
const TRIAGE_FALLBACK_CANDIDATES = [
|
||||
"kimi-coding/kimi-for-coding",
|
||||
"minimax/MiniMax-M2.7",
|
||||
"zai/glm-5",
|
||||
];
|
||||
|
||||
function parseTriageModelString(input) {
|
||||
if (typeof input !== "string") return null;
|
||||
const slash = input.indexOf("/");
|
||||
|
|
@ -368,6 +378,110 @@ async function resolveTriageModel(providerModelString) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json
|
||||
* and expand "provider/*" wildcards against @singularity-forge/ai's MODELS
|
||||
* catalog. Always also include defaultProvider/defaultModel from the same
|
||||
* settings so the operator's chosen default is never silently dropped.
|
||||
*
|
||||
* Returns null on any failure — callers fall back to TRIAGE_FALLBACK_CANDIDATES.
|
||||
*/
|
||||
async function readOperatorTriageCandidates() {
|
||||
try {
|
||||
const { getSfAgentSettingsPath } = await import("./preferences.js");
|
||||
const path = getSfAgentSettingsPath();
|
||||
if (!existsSync(path)) return null;
|
||||
const settings = JSON.parse(readFileSync(path, "utf-8"));
|
||||
const enabled = Array.isArray(settings?.enabledModels)
|
||||
? settings.enabledModels
|
||||
: [];
|
||||
const defaultProvider = settings?.defaultProvider;
|
||||
const defaultModel = settings?.defaultModel;
|
||||
|
||||
const result = new Set();
|
||||
if (defaultProvider && defaultModel) {
|
||||
result.add(`${defaultProvider}/${defaultModel}`);
|
||||
}
|
||||
|
||||
// Expand wildcards by walking the pi-ai MODELS catalog for matching
|
||||
// provider entries. Exact "provider/modelId" entries pass through.
|
||||
const ai = await import("@singularity-forge/ai");
|
||||
for (const entry of enabled) {
|
||||
if (typeof entry !== "string" || !entry.includes("/")) continue;
|
||||
const slash = entry.indexOf("/");
|
||||
const provider = entry.slice(0, slash);
|
||||
const modelGlob = entry.slice(slash + 1);
|
||||
if (modelGlob !== "*") {
|
||||
result.add(entry);
|
||||
continue;
|
||||
}
|
||||
// Wildcard: enumerate models registered under this provider that
|
||||
// have an agentic profile we can score. getModelsByProvider is the
|
||||
// canonical pi-ai accessor; if it isn't available we just skip.
|
||||
if (typeof ai.getModelsByProvider === "function") {
|
||||
const models = ai.getModelsByProvider(provider) ?? [];
|
||||
for (const m of models) {
|
||||
if (m?.id) result.add(`${provider}/${m.id}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const list = Array.from(result);
|
||||
return list.length > 0 ? list : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Score the operator-derived candidate list via SF's model-router using the
|
||||
* self-feedback-triage requirements profile (agentic-heavy). Returns the
|
||||
* "provider/modelId" strings in best-first order. Used by runTriage for
|
||||
* dispatch-with-fallback when the top pick lacks credentials.
|
||||
*
|
||||
* Priority order (sf-mp5khix3-9beona AC1 + user correction):
|
||||
* 1. explicit candidates passed by caller (test injection)
|
||||
* 2. operator's settings.json enabledModels (expanded) + defaultModel
|
||||
* 3. TRIAGE_FALLBACK_CANDIDATES — only when settings.json is missing or
|
||||
* yields no usable models. This is the safe Chinese+Gemini set that
|
||||
* matches the operator's typical config, NOT a universal model dump.
|
||||
*
|
||||
* Consumer: runTriage (when operator doesn't pass --model).
|
||||
*/
|
||||
async function rankTriageModelsViaRouter(candidates) {
|
||||
const candidateList =
|
||||
(Array.isArray(candidates) && candidates.length > 0
|
||||
? candidates
|
||||
: await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES;
|
||||
try {
|
||||
const { BASE_REQUIREMENTS, scoreEligibleModels } = await import(
|
||||
"./model-router.js"
|
||||
);
|
||||
const ranked = scoreEligibleModels(
|
||||
candidateList,
|
||||
BASE_REQUIREMENTS["self-feedback-triage"],
|
||||
);
|
||||
const ids = ranked.map((r) => r.modelId).filter(Boolean);
|
||||
return ids.length > 0 ? ids : candidateList;
|
||||
} catch {
|
||||
return candidateList;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic: detect provider-credential failures that warrant trying the
|
||||
* next-best candidate instead of surfacing the error. We match on
|
||||
* substrings that pi-ai providers actually emit on missing-key paths.
|
||||
*/
|
||||
function isCredentialError(message) {
|
||||
if (typeof message !== "string") return false;
|
||||
return (
|
||||
/no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
|
||||
message,
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function extractAssistantText(message) {
|
||||
const content = message?.content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
|
|
@ -407,12 +521,21 @@ export function writeTriageDecisionReport(basePath, content) {
|
|||
/**
|
||||
* Run a triage pass against the canonical inline-fix prompt.
|
||||
*
|
||||
* Provider-agnostic: routes through @singularity-forge/ai's completeSimple,
|
||||
* mirroring runReflection. Captures the model's decision text — the actual
|
||||
* fix/promote/close actions are operator-applied (or, in a follow-up, a
|
||||
* tool-enabled variant will let the model call resolve_issue directly).
|
||||
* Model selection (sf-mp5khix3-9beona AC1): when options.model is supplied
|
||||
* the operator's choice wins. Otherwise, route through SF's model-router
|
||||
* using BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy) over the
|
||||
* TRIAGE_CANDIDATES list. Falls back to TRIAGE_FALLBACK_MODEL if scoring
|
||||
* fails. No more hardcoded gemini default.
|
||||
*
|
||||
* options.model — "provider/modelId" string. Defaults to DEFAULT_TRIAGE_MODEL.
|
||||
* Provider-agnostic dispatch: routes through @singularity-forge/ai's
|
||||
* completeSimple, mirroring runReflection. Today's path captures decision
|
||||
* text only — the actual fix/promote/close actions are operator-applied.
|
||||
* A tool-enabled --apply variant (where the model calls resolve_issue
|
||||
* directly) lands in a follow-up (sf-mp5khix3-9beona AC2).
|
||||
*
|
||||
* options.model — "provider/modelId" override. Defaults to router pick.
|
||||
* options.candidates — override the candidate list for router-based pick
|
||||
* (mostly useful in tests).
|
||||
* options.complete — dependency injection for tests; same shape as the
|
||||
* reflection runner.
|
||||
* options.timeoutMs — defaults to 8 minutes.
|
||||
|
|
@ -423,24 +546,21 @@ export function writeTriageDecisionReport(basePath, content) {
|
|||
* Consumer: headless-triage operator surface (--run flag).
|
||||
*/
|
||||
export async function runTriage(prompt, options = {}) {
|
||||
const modelString = options.model ?? DEFAULT_TRIAGE_MODEL;
|
||||
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
|
||||
|
||||
let model;
|
||||
try {
|
||||
model = await resolveTriageModel(modelString);
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `failed to load model catalog: ${getErrorMessage(err)}`,
|
||||
};
|
||||
}
|
||||
if (!model) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
|
||||
};
|
||||
}
|
||||
// Build the dispatch order. Operator --model wins as a single-shot
|
||||
// pick (no fallback). Without --model, walk the router-ranked list and
|
||||
// fall through to the next candidate on credential errors.
|
||||
const dispatchOrder = options.model
|
||||
? [options.model]
|
||||
: await rankTriageModelsViaRouter(options.candidates);
|
||||
|
||||
const completeFn =
|
||||
options.complete ??
|
||||
(await (async () => {
|
||||
const ai = await import("@singularity-forge/ai");
|
||||
return ai.completeSimple;
|
||||
})());
|
||||
|
||||
const context = {
|
||||
systemPrompt: undefined,
|
||||
|
|
@ -453,46 +573,84 @@ export async function runTriage(prompt, options = {}) {
|
|||
],
|
||||
};
|
||||
|
||||
const completeFn =
|
||||
options.complete ??
|
||||
(await (async () => {
|
||||
const ai = await import("@singularity-forge/ai");
|
||||
return ai.completeSimple;
|
||||
})());
|
||||
|
||||
const callPromise = (async () => {
|
||||
const attemptOne = async (modelString) => {
|
||||
let model;
|
||||
try {
|
||||
const message = await completeFn(model, context, {});
|
||||
const content = extractAssistantText(message);
|
||||
return {
|
||||
ok: true,
|
||||
content,
|
||||
cleanFinish: content.includes(TRIAGE_TERMINATOR),
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
};
|
||||
model = await resolveTriageModel(modelString);
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `provider call failed: ${getErrorMessage(err)}`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
error: `failed to load model catalog: ${getErrorMessage(err)}`,
|
||||
modelString,
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve({
|
||||
if (!model) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `triage call timed out after ${timeoutMs}ms`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
});
|
||||
}, timeoutMs);
|
||||
});
|
||||
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
|
||||
modelString,
|
||||
};
|
||||
}
|
||||
|
||||
return Promise.race([callPromise, timeoutPromise]);
|
||||
const callPromise = (async () => {
|
||||
try {
|
||||
const message = await completeFn(model, context, {});
|
||||
const content = extractAssistantText(message);
|
||||
return {
|
||||
ok: true,
|
||||
content,
|
||||
cleanFinish: content.includes(TRIAGE_TERMINATOR),
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `provider call failed: ${getErrorMessage(err)}`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve({
|
||||
ok: false,
|
||||
error: `triage call timed out after ${timeoutMs}ms`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
});
|
||||
}, timeoutMs);
|
||||
});
|
||||
|
||||
return Promise.race([callPromise, timeoutPromise]);
|
||||
};
|
||||
|
||||
// Walk the ranked list. Retry-on-credential-error keeps us from giving
|
||||
// up because the highest-scoring model happens to lack credentials in
|
||||
// the operator's environment (e.g. router picks openai-codex-responses
|
||||
// when the operator only has anthropic + gemini-cli auth).
|
||||
const attempts = [];
|
||||
for (const modelString of dispatchOrder) {
|
||||
const result = await attemptOne(modelString);
|
||||
attempts.push({ modelString, error: result.error });
|
||||
if (result.ok) return result;
|
||||
// Only fall through on credential errors. Unknown-model + timeout +
|
||||
// other errors surface as-is (no point retrying with a different
|
||||
// model if the prompt itself is broken or the network is down).
|
||||
if (!isCredentialError(result.error) || dispatchOrder.length <= 1) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// All candidates exhausted on credential errors.
|
||||
const lastErr = attempts[attempts.length - 1];
|
||||
return {
|
||||
ok: false,
|
||||
error: `provider call failed: no candidate model had usable credentials (tried ${attempts.length}: ${attempts.map((a) => a.modelString).join(", ")}). Last error: ${lastErr?.error ?? "unknown"}`,
|
||||
modelId: lastErr?.modelString,
|
||||
};
|
||||
}
|
||||
|
||||
export function consumeCompletedInlineFixClaim(basePath) {
|
||||
|
|
|
|||
|
|
@ -328,6 +328,54 @@ describe("runTriage (dependency-injected)", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => {
|
||||
test("router scores candidates by agentic profile and picks the best", async () => {
|
||||
// Test fixture uses the operator's actual provider allowlist
|
||||
// (minimax + kimi-coding) instead of cross-provider examples:
|
||||
// - MiniMax-M2.1 has agentic 40 (older gen, penalized after
|
||||
// the 60+ checkpoint loop incident on 2026-05-13)
|
||||
// - kimi-for-coding aliases to kimi-k2.6 capability profile,
|
||||
// agentic 90 (pinned autonomous-solver default per ADR-0079)
|
||||
// With agentic-heavy requirements the router must pick kimi.
|
||||
const seen = { modelId: null };
|
||||
await runTriage("prompt", {
|
||||
candidates: [
|
||||
"minimax/MiniMax-M2.1",
|
||||
"kimi-coding/kimi-for-coding",
|
||||
],
|
||||
complete: async (model) => {
|
||||
seen.modelId = model.id;
|
||||
return { content: [{ type: "text", text: "ok" }] };
|
||||
},
|
||||
});
|
||||
expect(seen.modelId).toBe("kimi-for-coding");
|
||||
});
|
||||
|
||||
test("operator --model still wins over router pick", async () => {
|
||||
const seen = { modelId: null };
|
||||
await runTriage("prompt", {
|
||||
model: "anthropic/claude-sonnet-4-6",
|
||||
complete: async (model) => {
|
||||
seen.modelId = model.id;
|
||||
return { content: [{ type: "text", text: "ok" }] };
|
||||
},
|
||||
});
|
||||
expect(seen.modelId).toBe("claude-sonnet-4-6");
|
||||
});
|
||||
|
||||
test("candidates override picks from a custom list", async () => {
|
||||
const seen = { modelId: null };
|
||||
await runTriage("prompt", {
|
||||
candidates: ["anthropic/claude-opus-4-6"],
|
||||
complete: async (model) => {
|
||||
seen.modelId = model.id;
|
||||
return { content: [{ type: "text", text: "ok" }] };
|
||||
},
|
||||
});
|
||||
expect(seen.modelId).toBe("claude-opus-4-6");
|
||||
});
|
||||
});
|
||||
|
||||
describe("writeTriageDecisionReport", () => {
|
||||
test("writes to .sf/triage/decisions/<ts>.md and returns the path", () => {
|
||||
const dir = makeForgeProject();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue