feat(self-feedback-drain): filter free opencode models from triage routing

Self-feedback triage routing was including paid opencode models even
when the operator policy prefers the free tier. Add
isOpenCodeProvider() + isFreeOpenCodeModelId() and filter the
candidate list before the router scores them.

Also: cosmetic — quote style normalised by the formatter on
buildInlineFixPrompt strings and spawn options object.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 16:37:24 +02:00
parent 3a14fe86a7
commit fdc4650016
2 changed files with 170 additions and 45 deletions

View file

@ -265,10 +265,10 @@ export function buildInlineFixPrompt(entries) {
"",
"Decision procedure:",
"1. For each entry: verify the claim still applies against the current code. If it doesn't, that's outcome C.",
"2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: \"agent-fix\", commitSha: <sha>}` and a `reason` naming what landed.",
"3. If outcome B (promote): call `resolve_issue` with evidence `{kind: \"promoted-to-requirement\", requirementId: <id>}` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.",
"4. If outcome C (close): call `resolve_issue` with evidence `{kind: \"human-clear\"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by <commit>, false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.",
"5. Never use evidence `{kind: \"auto-version-bump\"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.",
'2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: "agent-fix", commitSha: <sha>}` and a `reason` naming what landed.',
'3. If outcome B (promote): call `resolve_issue` with evidence `{kind: "promoted-to-requirement", requirementId: <id>}` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.',
'4. If outcome C (close): call `resolve_issue` with evidence `{kind: "human-clear"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by <commit>, false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.',
'5. Never use evidence `{kind: "auto-version-bump"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.',
"6. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`. Always go through `resolve_issue` so the DB, markdown projection, and reload detection stay consistent.",
"",
"When every entry has a decision, say: Self-feedback triage complete.",
@ -336,7 +336,12 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) {
const child = spawn(
process.execPath,
[sfBin, "headless", "triage", "--apply", "--json"],
{ cwd: basePath, stdio: ["ignore", "ignore", "ignore"], detached: true, env },
{
cwd: basePath,
stdio: ["ignore", "ignore", "ignore"],
detached: true,
env,
},
);
child.on("error", (err) => {
writeFailedClaim(basePath, ids, getErrorMessage(err));
@ -415,6 +420,41 @@ function parseTriageModelString(input) {
return [input.slice(0, slash), input.slice(slash + 1)];
}
function isOpenCodeProvider(provider) {
return provider === "opencode";
}
function isFreeOpenCodeModelId(modelId) {
const id = String(modelId ?? "").toLowerCase();
return id.includes(":free") || /(?:^|-)free(?:-|$)/.test(id);
}
/**
* Filter triage model candidates through provider-specific operator policy.
*
* Purpose: enforce the operator's cost boundary before the router scores
* candidates. `opencode/*` in settings means "all free OpenCode models", not
* "all OpenCode billing models"; otherwise triage can pick paid GPT routes like
* `opencode/gpt-5.1` just because they score high on agentic capability.
* `opencode-go/*` is intentionally not filtered here because that provider's
* catalog is treated as free by policy.
*
* Consumer: readOperatorTriageCandidates and rankTriageModelsViaRouter.
*
* @param {string[]} candidates
* @returns {string[]}
*/
export function filterTriageCandidatesByProviderPolicy(candidates) {
if (!Array.isArray(candidates)) return [];
return candidates.filter((candidate) => {
const parsed = parseTriageModelString(candidate);
if (!parsed) return false;
const [provider, modelId] = parsed;
if (!isOpenCodeProvider(provider)) return true;
return isFreeOpenCodeModelId(modelId);
});
}
async function resolveTriageModel(providerModelString) {
const parsed = parseTriageModelString(providerModelString);
if (!parsed) return null;
@ -463,7 +503,11 @@ async function readOperatorTriageCandidates() {
const provider = entry.slice(0, slash);
const modelGlob = entry.slice(slash + 1);
if (modelGlob !== "*") {
result.add(entry);
for (const candidate of filterTriageCandidatesByProviderPolicy([
entry,
])) {
result.add(candidate);
}
continue;
}
// Wildcard: enumerate models registered under this provider via
@ -473,7 +517,12 @@ async function readOperatorTriageCandidates() {
if (typeof ai.getModels === "function") {
const models = ai.getModels(provider) ?? [];
for (const m of models) {
if (m?.id) result.add(`${provider}/${m.id}`);
if (!m?.id) continue;
for (const candidate of filterTriageCandidatesByProviderPolicy([
`${provider}/${m.id}`,
])) {
result.add(candidate);
}
}
}
}
@ -502,22 +551,26 @@ async function readOperatorTriageCandidates() {
* operator doesn't pass --model). Also used by the deprecated runTriage.
*/
export async function rankTriageModelsViaRouter(candidates) {
const candidateList =
const rawCandidateList =
(Array.isArray(candidates) && candidates.length > 0
? candidates
: await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES;
const candidateList =
filterTriageCandidatesByProviderPolicy(rawCandidateList);
const effectiveCandidateList =
candidateList.length > 0 ? candidateList : TRIAGE_FALLBACK_CANDIDATES;
try {
const { BASE_REQUIREMENTS, scoreEligibleModels } = await import(
"./model-router.js"
);
const ranked = scoreEligibleModels(
candidateList,
effectiveCandidateList,
BASE_REQUIREMENTS["self-feedback-triage"],
);
const ids = ranked.map((r) => r.modelId).filter(Boolean);
return ids.length > 0 ? ids : candidateList;
return ids.length > 0 ? ids : effectiveCandidateList;
} catch {
return candidateList;
return effectiveCandidateList;
}
}
@ -528,10 +581,8 @@ export async function rankTriageModelsViaRouter(candidates) {
*/
function isCredentialError(message) {
if (typeof message !== "string") return false;
return (
/no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
message,
)
return /no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
message,
);
}

View file

@ -1,14 +1,11 @@
import {
mkdirSync,
mkdtempSync,
rmSync,
writeFileSync,
} from "node:fs";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, test } from "vitest";
import {
buildInlineFixPrompt,
filterTriageCandidatesByProviderPolicy,
rankTriageModelsViaRouter,
runTriage,
selectInlineFixCandidates,
writeTriageDecisionReport,
@ -30,7 +27,11 @@ function makeForgeProject() {
function writeEntries(basePath, entries) {
const path = join(basePath, ".sf", "self-feedback.jsonl");
writeFileSync(path, `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`, "utf-8");
writeFileSync(
path,
`${entries.map((e) => JSON.stringify(e)).join("\n")}\n`,
"utf-8",
);
}
function entry(overrides = {}) {
@ -69,23 +70,58 @@ describe("selectInlineFixCandidates", () => {
test("selects every open forge-local entry regardless of severity, blocking, or kind (sorted by prioritization)", () => {
const dir = makeForgeProject();
writeEntries(dir, [
entry({ id: "hi-block", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }),
entry({ id: "crit-block", severity: "critical", blocking: true, kind: "runaway-loop:foo", ts: "2026-05-02T00:00:00Z" }),
entry({ id: "med-block", severity: "medium", blocking: true, kind: "inconsistency:bar", ts: "2026-05-03T00:00:00Z" }),
entry({ id: "med-nonblock", severity: "medium", blocking: false, kind: "janitor-gap:baz", ts: "2026-05-04T00:00:00Z" }),
entry({ id: "low-nonblock", severity: "low", blocking: false, kind: "gap:tiebreak", ts: "2026-05-05T00:00:00Z" }),
entry({ id: "arch", severity: "medium", blocking: false, kind: "architecture-defect:foo", ts: "2026-05-06T00:00:00Z" }),
entry({
id: "hi-block",
severity: "high",
blocking: true,
ts: "2026-05-01T00:00:00Z",
}),
entry({
id: "crit-block",
severity: "critical",
blocking: true,
kind: "runaway-loop:foo",
ts: "2026-05-02T00:00:00Z",
}),
entry({
id: "med-block",
severity: "medium",
blocking: true,
kind: "inconsistency:bar",
ts: "2026-05-03T00:00:00Z",
}),
entry({
id: "med-nonblock",
severity: "medium",
blocking: false,
kind: "janitor-gap:baz",
ts: "2026-05-04T00:00:00Z",
}),
entry({
id: "low-nonblock",
severity: "low",
blocking: false,
kind: "gap:tiebreak",
ts: "2026-05-05T00:00:00Z",
}),
entry({
id: "arch",
severity: "medium",
blocking: false,
kind: "architecture-defect:foo",
ts: "2026-05-06T00:00:00Z",
}),
]);
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
// Order: critical (95) > high (80) > medium (50) > low (20).
// Within same impact, ts asc breaks the tie.
expect(ids).toEqual([
"crit-block", // critical / 95
"hi-block", // high / 80
"med-block", // medium / 50, oldest
"med-nonblock", // medium / 50
"arch", // medium / 50, newest
"low-nonblock", // low / 20
"crit-block", // critical / 95
"hi-block", // high / 80
"med-block", // medium / 50, oldest
"med-nonblock", // medium / 50
"arch", // medium / 50, newest
"low-nonblock", // low / 20
]);
});
@ -110,7 +146,10 @@ describe("selectInlineFixCandidates", () => {
id: "promoted",
kind: "gap:baz",
resolvedAt: "2026-05-10T00:00:00Z",
resolvedEvidence: { kind: "promoted-to-requirement", requirementId: "R1" },
resolvedEvidence: {
kind: "promoted-to-requirement",
requirementId: "R1",
},
}),
]);
expect(selectInlineFixCandidates(dir)).toEqual([]);
@ -218,11 +257,11 @@ describe("selectInlineFixCandidates", () => {
]);
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
expect(ids).toEqual([
"high-severity-quick", // high impact + effort=1 wins
"high-severity-newer", // high impact + effort=undefined→3
"high-severity-slow", // high impact + effort=5
"medium-newest", // medium impact
"low-severity-oldest", // low impact loses despite older ts
"high-severity-quick", // high impact + effort=1 wins
"high-severity-newer", // high impact + effort=undefined→3
"high-severity-slow", // high impact + effort=5
"medium-newest", // medium impact
"low-severity-oldest", // low impact loses despite older ts
]);
});
@ -249,6 +288,41 @@ describe("selectInlineFixCandidates", () => {
});
});
describe("triage model candidate policy", () => {
test("filterTriageCandidatesByProviderPolicy_removes_paid_opencode_models_but_keeps_opencode_go", () => {
expect(
filterTriageCandidatesByProviderPolicy([
"opencode/gpt-5.1",
"opencode/minimax-m2.5-free",
"opencode/qwen3-coder:free",
"opencode-go/glm-5",
"opencode-go/nemotron-3-super-free",
"kimi-coding/kimi-k2.6",
]),
).toEqual([
"opencode/minimax-m2.5-free",
"opencode/qwen3-coder:free",
"opencode-go/glm-5",
"opencode-go/nemotron-3-super-free",
"kimi-coding/kimi-k2.6",
]);
});
test("rankTriageModelsViaRouter_when_candidates_include_paid_opencode_never_returns_them", async () => {
const ranked = await rankTriageModelsViaRouter([
"opencode/gpt-5.1",
"opencode/gpt-5.2",
"opencode/minimax-m2.5-free",
"kimi-coding/kimi-for-coding",
"minimax/MiniMax-M2.7",
]);
expect(ranked).not.toContain("opencode/gpt-5.1");
expect(ranked).not.toContain("opencode/gpt-5.2");
expect(ranked).toContain("opencode/minimax-m2.5-free");
});
});
describe("buildInlineFixPrompt", () => {
// Exported (sf-mp4rxkwb-l4baga partial) so headless-triage.ts can render
// the canonical triage prompt without going through the followUp dispatch
@ -284,7 +358,10 @@ describe("runTriage (dependency-injected)", () => {
test("returns ok+content on success and flags clean-finish from terminator", async () => {
const fakeMessage = {
content: [
{ type: "text", text: "decision: close.\nSelf-feedback triage complete" },
{
type: "text",
text: "decision: close.\nSelf-feedback triage complete",
},
],
};
const result = await runTriage("triage prompt", {
@ -339,10 +416,7 @@ describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => {
// With agentic-heavy requirements the router must pick kimi.
const seen = { modelId: null };
await runTriage("prompt", {
candidates: [
"minimax/MiniMax-M2.1",
"kimi-coding/kimi-for-coding",
],
candidates: ["minimax/MiniMax-M2.1", "kimi-coding/kimi-for-coding"],
complete: async (model) => {
seen.modelId = model.id;
return { content: [{ type: "text", text: "ok" }] };