feat(self-feedback-drain): filter free opencode models from triage routing
Self-feedback triage routing was including paid opencode models even when the operator policy prefers the free tier. Add isOpenCodeProvider() + isFreeOpenCodeModelId() and filter the candidate list before the router scores them. Also: cosmetic — quote style normalised by the formatter on buildInlineFixPrompt strings and spawn options object. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3a14fe86a7
commit
fdc4650016
2 changed files with 170 additions and 45 deletions
|
|
@ -265,10 +265,10 @@ export function buildInlineFixPrompt(entries) {
|
|||
"",
|
||||
"Decision procedure:",
|
||||
"1. For each entry: verify the claim still applies against the current code. If it doesn't, that's outcome C.",
|
||||
"2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: \"agent-fix\", commitSha: <sha>}` and a `reason` naming what landed.",
|
||||
"3. If outcome B (promote): call `resolve_issue` with evidence `{kind: \"promoted-to-requirement\", requirementId: <id>}` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.",
|
||||
"4. If outcome C (close): call `resolve_issue` with evidence `{kind: \"human-clear\"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by <commit>, false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.",
|
||||
"5. Never use evidence `{kind: \"auto-version-bump\"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.",
|
||||
'2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: "agent-fix", commitSha: <sha>}` and a `reason` naming what landed.',
|
||||
'3. If outcome B (promote): call `resolve_issue` with evidence `{kind: "promoted-to-requirement", requirementId: <id>}` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.',
|
||||
'4. If outcome C (close): call `resolve_issue` with evidence `{kind: "human-clear"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by <commit>, false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.',
|
||||
'5. Never use evidence `{kind: "auto-version-bump"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.',
|
||||
"6. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`. Always go through `resolve_issue` so the DB, markdown projection, and reload detection stay consistent.",
|
||||
"",
|
||||
"When every entry has a decision, say: Self-feedback triage complete.",
|
||||
|
|
@ -336,7 +336,12 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) {
|
|||
const child = spawn(
|
||||
process.execPath,
|
||||
[sfBin, "headless", "triage", "--apply", "--json"],
|
||||
{ cwd: basePath, stdio: ["ignore", "ignore", "ignore"], detached: true, env },
|
||||
{
|
||||
cwd: basePath,
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
detached: true,
|
||||
env,
|
||||
},
|
||||
);
|
||||
child.on("error", (err) => {
|
||||
writeFailedClaim(basePath, ids, getErrorMessage(err));
|
||||
|
|
@ -415,6 +420,41 @@ function parseTriageModelString(input) {
|
|||
return [input.slice(0, slash), input.slice(slash + 1)];
|
||||
}
|
||||
|
||||
function isOpenCodeProvider(provider) {
|
||||
return provider === "opencode";
|
||||
}
|
||||
|
||||
function isFreeOpenCodeModelId(modelId) {
|
||||
const id = String(modelId ?? "").toLowerCase();
|
||||
return id.includes(":free") || /(?:^|-)free(?:-|$)/.test(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter triage model candidates through provider-specific operator policy.
|
||||
*
|
||||
* Purpose: enforce the operator's cost boundary before the router scores
|
||||
* candidates. `opencode/*` in settings means "all free OpenCode models", not
|
||||
* "all OpenCode billing models"; otherwise triage can pick paid GPT routes like
|
||||
* `opencode/gpt-5.1` just because they score high on agentic capability.
|
||||
* `opencode-go/*` is intentionally not filtered here because that provider's
|
||||
* catalog is treated as free by policy.
|
||||
*
|
||||
* Consumer: readOperatorTriageCandidates and rankTriageModelsViaRouter.
|
||||
*
|
||||
* @param {string[]} candidates
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function filterTriageCandidatesByProviderPolicy(candidates) {
|
||||
if (!Array.isArray(candidates)) return [];
|
||||
return candidates.filter((candidate) => {
|
||||
const parsed = parseTriageModelString(candidate);
|
||||
if (!parsed) return false;
|
||||
const [provider, modelId] = parsed;
|
||||
if (!isOpenCodeProvider(provider)) return true;
|
||||
return isFreeOpenCodeModelId(modelId);
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveTriageModel(providerModelString) {
|
||||
const parsed = parseTriageModelString(providerModelString);
|
||||
if (!parsed) return null;
|
||||
|
|
@ -463,7 +503,11 @@ async function readOperatorTriageCandidates() {
|
|||
const provider = entry.slice(0, slash);
|
||||
const modelGlob = entry.slice(slash + 1);
|
||||
if (modelGlob !== "*") {
|
||||
result.add(entry);
|
||||
for (const candidate of filterTriageCandidatesByProviderPolicy([
|
||||
entry,
|
||||
])) {
|
||||
result.add(candidate);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Wildcard: enumerate models registered under this provider via
|
||||
|
|
@ -473,7 +517,12 @@ async function readOperatorTriageCandidates() {
|
|||
if (typeof ai.getModels === "function") {
|
||||
const models = ai.getModels(provider) ?? [];
|
||||
for (const m of models) {
|
||||
if (m?.id) result.add(`${provider}/${m.id}`);
|
||||
if (!m?.id) continue;
|
||||
for (const candidate of filterTriageCandidatesByProviderPolicy([
|
||||
`${provider}/${m.id}`,
|
||||
])) {
|
||||
result.add(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -502,22 +551,26 @@ async function readOperatorTriageCandidates() {
|
|||
* operator doesn't pass --model). Also used by the deprecated runTriage.
|
||||
*/
|
||||
export async function rankTriageModelsViaRouter(candidates) {
|
||||
const candidateList =
|
||||
const rawCandidateList =
|
||||
(Array.isArray(candidates) && candidates.length > 0
|
||||
? candidates
|
||||
: await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES;
|
||||
const candidateList =
|
||||
filterTriageCandidatesByProviderPolicy(rawCandidateList);
|
||||
const effectiveCandidateList =
|
||||
candidateList.length > 0 ? candidateList : TRIAGE_FALLBACK_CANDIDATES;
|
||||
try {
|
||||
const { BASE_REQUIREMENTS, scoreEligibleModels } = await import(
|
||||
"./model-router.js"
|
||||
);
|
||||
const ranked = scoreEligibleModels(
|
||||
candidateList,
|
||||
effectiveCandidateList,
|
||||
BASE_REQUIREMENTS["self-feedback-triage"],
|
||||
);
|
||||
const ids = ranked.map((r) => r.modelId).filter(Boolean);
|
||||
return ids.length > 0 ? ids : candidateList;
|
||||
return ids.length > 0 ? ids : effectiveCandidateList;
|
||||
} catch {
|
||||
return candidateList;
|
||||
return effectiveCandidateList;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -528,10 +581,8 @@ export async function rankTriageModelsViaRouter(candidates) {
|
|||
*/
|
||||
function isCredentialError(message) {
|
||||
if (typeof message !== "string") return false;
|
||||
return (
|
||||
/no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
|
||||
message,
|
||||
)
|
||||
return /no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, test } from "vitest";
|
||||
import {
|
||||
buildInlineFixPrompt,
|
||||
filterTriageCandidatesByProviderPolicy,
|
||||
rankTriageModelsViaRouter,
|
||||
runTriage,
|
||||
selectInlineFixCandidates,
|
||||
writeTriageDecisionReport,
|
||||
|
|
@ -30,7 +27,11 @@ function makeForgeProject() {
|
|||
|
||||
function writeEntries(basePath, entries) {
|
||||
const path = join(basePath, ".sf", "self-feedback.jsonl");
|
||||
writeFileSync(path, `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`, "utf-8");
|
||||
writeFileSync(
|
||||
path,
|
||||
`${entries.map((e) => JSON.stringify(e)).join("\n")}\n`,
|
||||
"utf-8",
|
||||
);
|
||||
}
|
||||
|
||||
function entry(overrides = {}) {
|
||||
|
|
@ -69,23 +70,58 @@ describe("selectInlineFixCandidates", () => {
|
|||
test("selects every open forge-local entry regardless of severity, blocking, or kind (sorted by prioritization)", () => {
|
||||
const dir = makeForgeProject();
|
||||
writeEntries(dir, [
|
||||
entry({ id: "hi-block", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }),
|
||||
entry({ id: "crit-block", severity: "critical", blocking: true, kind: "runaway-loop:foo", ts: "2026-05-02T00:00:00Z" }),
|
||||
entry({ id: "med-block", severity: "medium", blocking: true, kind: "inconsistency:bar", ts: "2026-05-03T00:00:00Z" }),
|
||||
entry({ id: "med-nonblock", severity: "medium", blocking: false, kind: "janitor-gap:baz", ts: "2026-05-04T00:00:00Z" }),
|
||||
entry({ id: "low-nonblock", severity: "low", blocking: false, kind: "gap:tiebreak", ts: "2026-05-05T00:00:00Z" }),
|
||||
entry({ id: "arch", severity: "medium", blocking: false, kind: "architecture-defect:foo", ts: "2026-05-06T00:00:00Z" }),
|
||||
entry({
|
||||
id: "hi-block",
|
||||
severity: "high",
|
||||
blocking: true,
|
||||
ts: "2026-05-01T00:00:00Z",
|
||||
}),
|
||||
entry({
|
||||
id: "crit-block",
|
||||
severity: "critical",
|
||||
blocking: true,
|
||||
kind: "runaway-loop:foo",
|
||||
ts: "2026-05-02T00:00:00Z",
|
||||
}),
|
||||
entry({
|
||||
id: "med-block",
|
||||
severity: "medium",
|
||||
blocking: true,
|
||||
kind: "inconsistency:bar",
|
||||
ts: "2026-05-03T00:00:00Z",
|
||||
}),
|
||||
entry({
|
||||
id: "med-nonblock",
|
||||
severity: "medium",
|
||||
blocking: false,
|
||||
kind: "janitor-gap:baz",
|
||||
ts: "2026-05-04T00:00:00Z",
|
||||
}),
|
||||
entry({
|
||||
id: "low-nonblock",
|
||||
severity: "low",
|
||||
blocking: false,
|
||||
kind: "gap:tiebreak",
|
||||
ts: "2026-05-05T00:00:00Z",
|
||||
}),
|
||||
entry({
|
||||
id: "arch",
|
||||
severity: "medium",
|
||||
blocking: false,
|
||||
kind: "architecture-defect:foo",
|
||||
ts: "2026-05-06T00:00:00Z",
|
||||
}),
|
||||
]);
|
||||
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
|
||||
// Order: critical (95) > high (80) > medium (50) > low (20).
|
||||
// Within same impact, ts asc breaks the tie.
|
||||
expect(ids).toEqual([
|
||||
"crit-block", // critical / 95
|
||||
"hi-block", // high / 80
|
||||
"med-block", // medium / 50, oldest
|
||||
"med-nonblock", // medium / 50
|
||||
"arch", // medium / 50, newest
|
||||
"low-nonblock", // low / 20
|
||||
"crit-block", // critical / 95
|
||||
"hi-block", // high / 80
|
||||
"med-block", // medium / 50, oldest
|
||||
"med-nonblock", // medium / 50
|
||||
"arch", // medium / 50, newest
|
||||
"low-nonblock", // low / 20
|
||||
]);
|
||||
});
|
||||
|
||||
|
|
@ -110,7 +146,10 @@ describe("selectInlineFixCandidates", () => {
|
|||
id: "promoted",
|
||||
kind: "gap:baz",
|
||||
resolvedAt: "2026-05-10T00:00:00Z",
|
||||
resolvedEvidence: { kind: "promoted-to-requirement", requirementId: "R1" },
|
||||
resolvedEvidence: {
|
||||
kind: "promoted-to-requirement",
|
||||
requirementId: "R1",
|
||||
},
|
||||
}),
|
||||
]);
|
||||
expect(selectInlineFixCandidates(dir)).toEqual([]);
|
||||
|
|
@ -218,11 +257,11 @@ describe("selectInlineFixCandidates", () => {
|
|||
]);
|
||||
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
|
||||
expect(ids).toEqual([
|
||||
"high-severity-quick", // high impact + effort=1 wins
|
||||
"high-severity-newer", // high impact + effort=undefined→3
|
||||
"high-severity-slow", // high impact + effort=5
|
||||
"medium-newest", // medium impact
|
||||
"low-severity-oldest", // low impact loses despite older ts
|
||||
"high-severity-quick", // high impact + effort=1 wins
|
||||
"high-severity-newer", // high impact + effort=undefined→3
|
||||
"high-severity-slow", // high impact + effort=5
|
||||
"medium-newest", // medium impact
|
||||
"low-severity-oldest", // low impact loses despite older ts
|
||||
]);
|
||||
});
|
||||
|
||||
|
|
@ -249,6 +288,41 @@ describe("selectInlineFixCandidates", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("triage model candidate policy", () => {
|
||||
test("filterTriageCandidatesByProviderPolicy_removes_paid_opencode_models_but_keeps_opencode_go", () => {
|
||||
expect(
|
||||
filterTriageCandidatesByProviderPolicy([
|
||||
"opencode/gpt-5.1",
|
||||
"opencode/minimax-m2.5-free",
|
||||
"opencode/qwen3-coder:free",
|
||||
"opencode-go/glm-5",
|
||||
"opencode-go/nemotron-3-super-free",
|
||||
"kimi-coding/kimi-k2.6",
|
||||
]),
|
||||
).toEqual([
|
||||
"opencode/minimax-m2.5-free",
|
||||
"opencode/qwen3-coder:free",
|
||||
"opencode-go/glm-5",
|
||||
"opencode-go/nemotron-3-super-free",
|
||||
"kimi-coding/kimi-k2.6",
|
||||
]);
|
||||
});
|
||||
|
||||
test("rankTriageModelsViaRouter_when_candidates_include_paid_opencode_never_returns_them", async () => {
|
||||
const ranked = await rankTriageModelsViaRouter([
|
||||
"opencode/gpt-5.1",
|
||||
"opencode/gpt-5.2",
|
||||
"opencode/minimax-m2.5-free",
|
||||
"kimi-coding/kimi-for-coding",
|
||||
"minimax/MiniMax-M2.7",
|
||||
]);
|
||||
|
||||
expect(ranked).not.toContain("opencode/gpt-5.1");
|
||||
expect(ranked).not.toContain("opencode/gpt-5.2");
|
||||
expect(ranked).toContain("opencode/minimax-m2.5-free");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildInlineFixPrompt", () => {
|
||||
// Exported (sf-mp4rxkwb-l4baga partial) so headless-triage.ts can render
|
||||
// the canonical triage prompt without going through the followUp dispatch
|
||||
|
|
@ -284,7 +358,10 @@ describe("runTriage (dependency-injected)", () => {
|
|||
test("returns ok+content on success and flags clean-finish from terminator", async () => {
|
||||
const fakeMessage = {
|
||||
content: [
|
||||
{ type: "text", text: "decision: close.\nSelf-feedback triage complete" },
|
||||
{
|
||||
type: "text",
|
||||
text: "decision: close.\nSelf-feedback triage complete",
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = await runTriage("triage prompt", {
|
||||
|
|
@ -339,10 +416,7 @@ describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => {
|
|||
// With agentic-heavy requirements the router must pick kimi.
|
||||
const seen = { modelId: null };
|
||||
await runTriage("prompt", {
|
||||
candidates: [
|
||||
"minimax/MiniMax-M2.1",
|
||||
"kimi-coding/kimi-for-coding",
|
||||
],
|
||||
candidates: ["minimax/MiniMax-M2.1", "kimi-coding/kimi-for-coding"],
|
||||
complete: async (model) => {
|
||||
seen.modelId = model.id;
|
||||
return { content: [{ type: "text", text: "ok" }] };
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue