diff --git a/src/resources/extensions/sf/self-feedback-drain.js b/src/resources/extensions/sf/self-feedback-drain.js index 1e0a1e730..fee7e88ab 100644 --- a/src/resources/extensions/sf/self-feedback-drain.js +++ b/src/resources/extensions/sf/self-feedback-drain.js @@ -265,10 +265,10 @@ export function buildInlineFixPrompt(entries) { "", "Decision procedure:", "1. For each entry: verify the claim still applies against the current code. If it doesn't, that's outcome C.", - "2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: \"agent-fix\", commitSha: }` and a `reason` naming what landed.", - "3. If outcome B (promote): call `resolve_issue` with evidence `{kind: \"promoted-to-requirement\", requirementId: }` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.", - "4. If outcome C (close): call `resolve_issue` with evidence `{kind: \"human-clear\"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by , false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.", - "5. Never use evidence `{kind: \"auto-version-bump\"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.", + '2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: "agent-fix", commitSha: }` and a `reason` naming what landed.', + '3. If outcome B (promote): call `resolve_issue` with evidence `{kind: "promoted-to-requirement", requirementId: }` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.', + '4. If outcome C (close): call `resolve_issue` with evidence `{kind: "human-clear"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by , false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.', + '5. Never use evidence `{kind: "auto-version-bump"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.', "6. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`. Always go through `resolve_issue` so the DB, markdown projection, and reload detection stay consistent.", "", "When every entry has a decision, say: Self-feedback triage complete.", @@ -336,7 +336,12 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) { const child = spawn( process.execPath, [sfBin, "headless", "triage", "--apply", "--json"], - { cwd: basePath, stdio: ["ignore", "ignore", "ignore"], detached: true, env }, + { + cwd: basePath, + stdio: ["ignore", "ignore", "ignore"], + detached: true, + env, + }, ); child.on("error", (err) => { writeFailedClaim(basePath, ids, getErrorMessage(err)); @@ -415,6 +420,41 @@ function parseTriageModelString(input) { return [input.slice(0, slash), input.slice(slash + 1)]; } +function isOpenCodeProvider(provider) { + return provider === "opencode"; +} + +function isFreeOpenCodeModelId(modelId) { + const id = String(modelId ?? "").toLowerCase(); + return id.includes(":free") || /(?:^|-)free(?:-|$)/.test(id); +} + +/** + * Filter triage model candidates through provider-specific operator policy. + * + * Purpose: enforce the operator's cost boundary before the router scores + * candidates. `opencode/*` in settings means "all free OpenCode models", not + * "all OpenCode billing models"; otherwise triage can pick paid GPT routes like + * `opencode/gpt-5.1` just because they score high on agentic capability. + * `opencode-go/*` is intentionally not filtered here because that provider's + * catalog is treated as free by policy. + * + * Consumer: readOperatorTriageCandidates and rankTriageModelsViaRouter. + * + * @param {string[]} candidates + * @returns {string[]} + */ +export function filterTriageCandidatesByProviderPolicy(candidates) { + if (!Array.isArray(candidates)) return []; + return candidates.filter((candidate) => { + const parsed = parseTriageModelString(candidate); + if (!parsed) return false; + const [provider, modelId] = parsed; + if (!isOpenCodeProvider(provider)) return true; + return isFreeOpenCodeModelId(modelId); + }); +} + async function resolveTriageModel(providerModelString) { const parsed = parseTriageModelString(providerModelString); if (!parsed) return null; @@ -463,7 +503,11 @@ async function readOperatorTriageCandidates() { const provider = entry.slice(0, slash); const modelGlob = entry.slice(slash + 1); if (modelGlob !== "*") { - result.add(entry); + for (const candidate of filterTriageCandidatesByProviderPolicy([ + entry, + ])) { + result.add(candidate); + } continue; } // Wildcard: enumerate models registered under this provider via @@ -473,7 +517,12 @@ async function readOperatorTriageCandidates() { if (typeof ai.getModels === "function") { const models = ai.getModels(provider) ?? []; for (const m of models) { - if (m?.id) result.add(`${provider}/${m.id}`); + if (!m?.id) continue; + for (const candidate of filterTriageCandidatesByProviderPolicy([ + `${provider}/${m.id}`, + ])) { + result.add(candidate); + } } } } @@ -502,22 +551,26 @@ async function readOperatorTriageCandidates() { * operator doesn't pass --model). Also used by the deprecated runTriage. */ export async function rankTriageModelsViaRouter(candidates) { - const candidateList = + const rawCandidateList = (Array.isArray(candidates) && candidates.length > 0 ? candidates : await readOperatorTriageCandidates()) ?? TRIAGE_FALLBACK_CANDIDATES; + const candidateList = + filterTriageCandidatesByProviderPolicy(rawCandidateList); + const effectiveCandidateList = + candidateList.length > 0 ? candidateList : TRIAGE_FALLBACK_CANDIDATES; try { const { BASE_REQUIREMENTS, scoreEligibleModels } = await import( "./model-router.js" ); const ranked = scoreEligibleModels( - candidateList, + effectiveCandidateList, BASE_REQUIREMENTS["self-feedback-triage"], ); const ids = ranked.map((r) => r.modelId).filter(Boolean); - return ids.length > 0 ? ids : candidateList; + return ids.length > 0 ? ids : effectiveCandidateList; } catch { - return candidateList; + return effectiveCandidateList; } } @@ -528,10 +581,8 @@ export async function rankTriageModelsViaRouter(candidates) { */ function isCredentialError(message) { if (typeof message !== "string") return false; - return ( - /no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test( - message, - ) + return /no api key|missing api key|api[_ ]?key.*not set|unauthorized|authentication failed|not authenticated|credentials/i.test( + message, ); } diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs index ea68276d6..46c8b62b0 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs @@ -1,14 +1,11 @@ -import { - mkdirSync, - mkdtempSync, - rmSync, - writeFileSync, -} from "node:fs"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, expect, test } from "vitest"; import { buildInlineFixPrompt, + filterTriageCandidatesByProviderPolicy, + rankTriageModelsViaRouter, runTriage, selectInlineFixCandidates, writeTriageDecisionReport, @@ -30,7 +27,11 @@ function makeForgeProject() { function writeEntries(basePath, entries) { const path = join(basePath, ".sf", "self-feedback.jsonl"); - writeFileSync(path, `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`, "utf-8"); + writeFileSync( + path, + `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`, + "utf-8", + ); } function entry(overrides = {}) { @@ -69,23 +70,58 @@ describe("selectInlineFixCandidates", () => { test("selects every open forge-local entry regardless of severity, blocking, or kind (sorted by prioritization)", () => { const dir = makeForgeProject(); writeEntries(dir, [ - entry({ id: "hi-block", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }), - entry({ id: "crit-block", severity: "critical", blocking: true, kind: "runaway-loop:foo", ts: "2026-05-02T00:00:00Z" }), - entry({ id: "med-block", severity: "medium", blocking: true, kind: "inconsistency:bar", ts: "2026-05-03T00:00:00Z" }), - entry({ id: "med-nonblock", severity: "medium", blocking: false, kind: "janitor-gap:baz", ts: "2026-05-04T00:00:00Z" }), - entry({ id: "low-nonblock", severity: "low", blocking: false, kind: "gap:tiebreak", ts: "2026-05-05T00:00:00Z" }), - entry({ id: "arch", severity: "medium", blocking: false, kind: "architecture-defect:foo", ts: "2026-05-06T00:00:00Z" }), + entry({ + id: "hi-block", + severity: "high", + blocking: true, + ts: "2026-05-01T00:00:00Z", + }), + entry({ + id: "crit-block", + severity: "critical", + blocking: true, + kind: "runaway-loop:foo", + ts: "2026-05-02T00:00:00Z", + }), + entry({ + id: "med-block", + severity: "medium", + blocking: true, + kind: "inconsistency:bar", + ts: "2026-05-03T00:00:00Z", + }), + entry({ + id: "med-nonblock", + severity: "medium", + blocking: false, + kind: "janitor-gap:baz", + ts: "2026-05-04T00:00:00Z", + }), + entry({ + id: "low-nonblock", + severity: "low", + blocking: false, + kind: "gap:tiebreak", + ts: "2026-05-05T00:00:00Z", + }), + entry({ + id: "arch", + severity: "medium", + blocking: false, + kind: "architecture-defect:foo", + ts: "2026-05-06T00:00:00Z", + }), ]); const ids = selectInlineFixCandidates(dir).map((e) => e.id); // Order: critical (95) > high (80) > medium (50) > low (20). // Within same impact, ts asc breaks the tie. expect(ids).toEqual([ - "crit-block", // critical / 95 - "hi-block", // high / 80 - "med-block", // medium / 50, oldest - "med-nonblock", // medium / 50 - "arch", // medium / 50, newest - "low-nonblock", // low / 20 + "crit-block", // critical / 95 + "hi-block", // high / 80 + "med-block", // medium / 50, oldest + "med-nonblock", // medium / 50 + "arch", // medium / 50, newest + "low-nonblock", // low / 20 ]); }); @@ -110,7 +146,10 @@ describe("selectInlineFixCandidates", () => { id: "promoted", kind: "gap:baz", resolvedAt: "2026-05-10T00:00:00Z", - resolvedEvidence: { kind: "promoted-to-requirement", requirementId: "R1" }, + resolvedEvidence: { + kind: "promoted-to-requirement", + requirementId: "R1", + }, }), ]); expect(selectInlineFixCandidates(dir)).toEqual([]); @@ -218,11 +257,11 @@ describe("selectInlineFixCandidates", () => { ]); const ids = selectInlineFixCandidates(dir).map((e) => e.id); expect(ids).toEqual([ - "high-severity-quick", // high impact + effort=1 wins - "high-severity-newer", // high impact + effort=undefined→3 - "high-severity-slow", // high impact + effort=5 - "medium-newest", // medium impact - "low-severity-oldest", // low impact loses despite older ts + "high-severity-quick", // high impact + effort=1 wins + "high-severity-newer", // high impact + effort=undefined→3 + "high-severity-slow", // high impact + effort=5 + "medium-newest", // medium impact + "low-severity-oldest", // low impact loses despite older ts ]); }); @@ -249,6 +288,41 @@ describe("selectInlineFixCandidates", () => { }); }); +describe("triage model candidate policy", () => { + test("filterTriageCandidatesByProviderPolicy_removes_paid_opencode_models_but_keeps_opencode_go", () => { + expect( + filterTriageCandidatesByProviderPolicy([ + "opencode/gpt-5.1", + "opencode/minimax-m2.5-free", + "opencode/qwen3-coder:free", + "opencode-go/glm-5", + "opencode-go/nemotron-3-super-free", + "kimi-coding/kimi-k2.6", + ]), + ).toEqual([ + "opencode/minimax-m2.5-free", + "opencode/qwen3-coder:free", + "opencode-go/glm-5", + "opencode-go/nemotron-3-super-free", + "kimi-coding/kimi-k2.6", + ]); + }); + + test("rankTriageModelsViaRouter_when_candidates_include_paid_opencode_never_returns_them", async () => { + const ranked = await rankTriageModelsViaRouter([ + "opencode/gpt-5.1", + "opencode/gpt-5.2", + "opencode/minimax-m2.5-free", + "kimi-coding/kimi-for-coding", + "minimax/MiniMax-M2.7", + ]); + + expect(ranked).not.toContain("opencode/gpt-5.1"); + expect(ranked).not.toContain("opencode/gpt-5.2"); + expect(ranked).toContain("opencode/minimax-m2.5-free"); + }); +}); + describe("buildInlineFixPrompt", () => { // Exported (sf-mp4rxkwb-l4baga partial) so headless-triage.ts can render // the canonical triage prompt without going through the followUp dispatch @@ -284,7 +358,10 @@ describe("runTriage (dependency-injected)", () => { test("returns ok+content on success and flags clean-finish from terminator", async () => { const fakeMessage = { content: [ - { type: "text", text: "decision: close.\nSelf-feedback triage complete" }, + { + type: "text", + text: "decision: close.\nSelf-feedback triage complete", + }, ], }; const result = await runTriage("triage prompt", { @@ -339,10 +416,7 @@ describe("runTriage model routing (sf-mp5khix3-9beona AC1)", () => { // With agentic-heavy requirements the router must pick kimi. const seen = { modelId: null }; await runTriage("prompt", { - candidates: [ - "minimax/MiniMax-M2.1", - "kimi-coding/kimi-for-coding", - ], + candidates: ["minimax/MiniMax-M2.1", "kimi-coding/kimi-for-coding"], complete: async (model) => { seen.modelId = model.id; return { content: [{ type: "text", text: "ok" }] };