diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js index 2f71ab4f5..03a79ab50 100644 --- a/src/resources/extensions/sf/auto-model-selection.js +++ b/src/resources/extensions/sf/auto-model-selection.js @@ -69,6 +69,59 @@ function readEnabledModels() { return undefined; // settings missing or unreadable → no allowlist constraint } } + +/** + * Return true for best-effort/free-tier model routes. + * + * Purpose: keep autonomous coding work on reliable paid/subscription routes by + * default; free SKUs are useful for experiments but have weaker availability and + * tool-loop guarantees. + * + * Consumer: selectAndApplyModel before dynamic routing and fallback hooks build + * their candidate pools. + */ +export function isFreeTierModelRoute(provider, modelId) { + const text = `${provider ?? ""}/${modelId ?? ""}`.toLowerCase(); + return /(^|[-_:/\s])free($|[-_:/\s])/i.test(text); +} + +/** + * Return true when a unit may use best-effort/free-tier routes. + * + * Purpose: allow cheap models on supporting work while keeping repo-changing + * worker and verification units on reliable routes. + * + * Consumer: filterAutoRoutableModels in autonomous model selection. + */ +export function allowsFreeTierAutoRoute(unitType) { + const type = String(unitType ?? ""); + return ( + type === "research-milestone" || + type === "research-slice" || + type === "discuss-milestone" || + type === "triage" || + type.startsWith("triage-") || + type.startsWith("hook/") + ); +} + +/** + * Remove free-tier routes from autonomous auto-selection unless the operator + * explicitly opts in. + * + * Purpose: prevent SF from silently choosing routes such as + * openrouter/qwen/qwen3-coder:free as the main autonomous worker for production + * repo changes. + * + * Consumer: dynamic routing candidate assembly in selectAndApplyModel. + */ +export function filterAutoRoutableModels(models, routingConfig, unitType) { + if (routingConfig?.allow_free_models === true) return models; + if (allowsFreeTierAutoRoute(unitType)) return models; + return models.filter( + (model) => !isFreeTierModelRoute(model.provider, model.id), + ); +} /** * Thrown when the model-policy gate rejects every candidate model for a unit * dispatch (#4959 / #4681 / #4850). The auto-loop catches this specifically @@ -438,7 +491,9 @@ export async function selectAndApplyModel( } let effectiveModelConfig = modelConfig; let routingTierLabel = ""; - let routingEligibleModels = availableModels; + let routingEligibleModels = isAutoMode + ? filterAutoRoutableModels(availableModels, routingConfig, unitType) + : availableModels; const taskMetadataForPolicy = unitType === "execute-task" ? extractTaskMetadata(unitId, basePath) diff --git a/src/resources/extensions/sf/auto-timers.js b/src/resources/extensions/sf/auto-timers.js index c63a613fe..b186a6bb1 100644 --- a/src/resources/extensions/sf/auto-timers.js +++ b/src/resources/extensions/sf/auto-timers.js @@ -8,6 +8,7 @@ import { saveActivityLog } from "./activity-log.js"; import { resolveAgentEndCancelled } from "./auto/resolve.js"; import { detectWorkingTreeActivity } from "./auto-supervisor.js"; +import { blockModel } from "./blocked-models.js"; import { recoverTimedOutUnit } from "./auto-timeout-recovery.js"; import { clearInFlightTools, @@ -270,6 +271,24 @@ export function startUnitSupervision(sctx) { } if (decision.action === "fail") { if (getInFlightToolCount() > 0) return; + const failedModel = s.currentUnitModel; + if ( + decision.reason === "zero-progress" && + failedModel?.provider && + failedModel?.id + ) { + blockModel( + s.basePath, + failedModel.provider, + failedModel.id, + `zero-progress on ${unitType} ${unitId}`, + { expiresAt: Date.now() + 60 * 60 * 1000 }, + ); + ctx.ui.notify( + `Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`, + "warning", + ); + } await closeoutUnit( ctx, s.basePath, diff --git a/src/resources/extensions/sf/self-feedback-drain.js b/src/resources/extensions/sf/self-feedback-drain.js index 5785b720a..b14da308a 100644 --- a/src/resources/extensions/sf/self-feedback-drain.js +++ b/src/resources/extensions/sf/self-feedback-drain.js @@ -267,6 +267,26 @@ export function buildInlineFixPrompt(entries) { "When every entry has a decision, say: Self-feedback triage complete.", ].join("\n"); } + +/** + * Return true when the current surface cannot reliably consume interactive + * follow-up turns. + * + * Purpose: web/RPC and headless autonomous runs need self-feedback repair to + * leave the main worker alone; injecting a hidden follow-up prompt into those + * surfaces can steal the next autonomous unit and trip the zero-progress guard. + * + * Consumer: dispatchSelfFeedbackInlineFixIfNeeded when choosing between a + * headless triage subprocess and an interactive `pi.sendMessage` follow-up. + */ +export function isMachineSelfFeedbackSurface(env = process.env) { + return ( + env.SF_HEADLESS === "1" || + env.SF_WEB_BRIDGE_TUI === "1" || + env.SF_WEB_AUTO_START_AUTONOMOUS === "1" + ); +} + /** * Dispatch a focused inline-fix turn for unresolved high/critical self-feedback. * @@ -312,7 +332,7 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) { // pipeline through SF's own subprocess machinery (router-resolved model, // watchdog, trust gate). Fire-and-forget: the autonomous loop will see the // resolved entries via DB on the next iteration's gate check. - if (process.env.SF_HEADLESS === "1") { + if (isMachineSelfFeedbackSurface()) { ctx.ui.notify( `Dispatching self-feedback inline fix via 'sf headless triage --apply' for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"} (headless surface).`, "warning", diff --git a/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs b/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs index 436f769d2..b21d9f3ae 100644 --- a/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs +++ b/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs @@ -20,7 +20,11 @@ import { afterEach, describe, test } from "vitest"; import { isModelInEnabledList } from "../preferences-models.js"; import "../preferences.js"; -import { selectAndApplyModel } from "../auto-model-selection.js"; +import { + allowsFreeTierAutoRoute, + filterAutoRoutableModels, + selectAndApplyModel, +} from "../auto-model-selection.js"; // ── Test environment setup ─────────────────────────────────────────────────── @@ -232,6 +236,41 @@ describe("isModelInEnabledList", () => { }); }); +describe("free-tier autonomous routing policy", () => { + const PAID = makeCandidate("kimi-coding", "kimi-k2.6"); + const FREE = makeCandidate("openrouter", "qwen/qwen3-coder:free"); + + test("main_worker_units_exclude_free_tier_routes_by_default", () => { + const filtered = filterAutoRoutableModels( + [FREE, PAID], + { allow_free_models: false }, + "execute-task", + ); + + assert.deepEqual( + filtered.map((m) => `${m.provider}/${m.id}`), + ["kimi-coding/kimi-k2.6"], + ); + }); + + test("secondary_units_may_use_free_tier_routes", () => { + assert.equal(allowsFreeTierAutoRoute("research-slice"), true); + assert.equal(allowsFreeTierAutoRoute("triage"), true); + assert.equal(allowsFreeTierAutoRoute("execute-task"), false); + + const filtered = filterAutoRoutableModels( + [FREE, PAID], + {}, + "research-slice", + ); + + assert.deepEqual( + filtered.map((m) => `${m.provider}/${m.id}`), + ["openrouter/qwen/qwen3-coder:free", "kimi-coding/kimi-k2.6"], + ); + }); +}); + // ── Part 2: fallback chain respects enabledModels ───────────────────────────── // // preferences.yaml pins execution chain to: diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs index 9af57f07c..85d833e5e 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs @@ -5,6 +5,7 @@ import { afterEach, describe, expect, test } from "vitest"; import { buildInlineFixPrompt, filterTriageCandidatesByProviderPolicy, + isMachineSelfFeedbackSurface, rankTriageModelsViaRouter, runTriage, selectInlineFixCandidates, @@ -361,6 +362,28 @@ describe("buildInlineFixPrompt", () => { }); }); +describe("isMachineSelfFeedbackSurface", () => { + test("treats web RPC bridge as machine surface even before SF_HEADLESS is set", () => { + expect( + isMachineSelfFeedbackSurface({ + SF_WEB_BRIDGE_TUI: "1", + }), + ).toBe(true); + }); + + test("treats server auto-start as machine surface", () => { + expect( + isMachineSelfFeedbackSurface({ + SF_WEB_AUTO_START_AUTONOMOUS: "1", + }), + ).toBe(true); + }); + + test("keeps ordinary interactive sessions on follow-up path", () => { + expect(isMachineSelfFeedbackSurface({})).toBe(false); + }); +}); + describe("runTriage (dependency-injected)", () => { test("returns ok+content on success and flags clean-finish from terminator", async () => { const fakeMessage = {