chore: auto-commit after challenge

SF-Unit: M048/S03/challenge
This commit is contained in:
Mikael Hugo 2026-05-17 20:16:24 +02:00
parent cf2d1a768e
commit 8f097f8dca
5 changed files with 159 additions and 3 deletions

View file

@ -69,6 +69,59 @@ function readEnabledModels() {
return undefined; // settings missing or unreadable → no allowlist constraint
}
}
/**
* Return true for best-effort/free-tier model routes.
*
* Purpose: keep autonomous coding work on reliable paid/subscription routes by
* default; free SKUs are useful for experiments but have weaker availability and
* tool-loop guarantees.
*
* Consumer: selectAndApplyModel before dynamic routing and fallback hooks build
* their candidate pools.
*/
export function isFreeTierModelRoute(provider, modelId) {
const text = `${provider ?? ""}/${modelId ?? ""}`.toLowerCase();
return /(^|[-_:/\s])free($|[-_:/\s])/i.test(text);
}
/**
* Return true when a unit may use best-effort/free-tier routes.
*
* Purpose: allow cheap models on supporting work while keeping repo-changing
* worker and verification units on reliable routes.
*
* Consumer: filterAutoRoutableModels in autonomous model selection.
*/
export function allowsFreeTierAutoRoute(unitType) {
const type = String(unitType ?? "");
return (
type === "research-milestone" ||
type === "research-slice" ||
type === "discuss-milestone" ||
type === "triage" ||
type.startsWith("triage-") ||
type.startsWith("hook/")
);
}
/**
* Remove free-tier routes from autonomous auto-selection unless the operator
* explicitly opts in.
*
* Purpose: prevent SF from silently choosing routes such as
* openrouter/qwen/qwen3-coder:free as the main autonomous worker for production
* repo changes.
*
* Consumer: dynamic routing candidate assembly in selectAndApplyModel.
*/
export function filterAutoRoutableModels(models, routingConfig, unitType) {
if (routingConfig?.allow_free_models === true) return models;
if (allowsFreeTierAutoRoute(unitType)) return models;
return models.filter(
(model) => !isFreeTierModelRoute(model.provider, model.id),
);
}
/**
* Thrown when the model-policy gate rejects every candidate model for a unit
* dispatch (#4959 / #4681 / #4850). The auto-loop catches this specifically
@ -438,7 +491,9 @@ export async function selectAndApplyModel(
}
let effectiveModelConfig = modelConfig;
let routingTierLabel = "";
let routingEligibleModels = availableModels;
let routingEligibleModels = isAutoMode
? filterAutoRoutableModels(availableModels, routingConfig, unitType)
: availableModels;
const taskMetadataForPolicy =
unitType === "execute-task"
? extractTaskMetadata(unitId, basePath)

View file

@ -8,6 +8,7 @@
import { saveActivityLog } from "./activity-log.js";
import { resolveAgentEndCancelled } from "./auto/resolve.js";
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
import { blockModel } from "./blocked-models.js";
import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
import {
clearInFlightTools,
@ -270,6 +271,24 @@ export function startUnitSupervision(sctx) {
}
if (decision.action === "fail") {
if (getInFlightToolCount() > 0) return;
const failedModel = s.currentUnitModel;
if (
decision.reason === "zero-progress" &&
failedModel?.provider &&
failedModel?.id
) {
blockModel(
s.basePath,
failedModel.provider,
failedModel.id,
`zero-progress on ${unitType} ${unitId}`,
{ expiresAt: Date.now() + 60 * 60 * 1000 },
);
ctx.ui.notify(
`Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`,
"warning",
);
}
await closeoutUnit(
ctx,
s.basePath,

View file

@ -267,6 +267,26 @@ export function buildInlineFixPrompt(entries) {
"When every entry has a decision, say: Self-feedback triage complete.",
].join("\n");
}
/**
* Return true when the current surface cannot reliably consume interactive
* follow-up turns.
*
* Purpose: web/RPC and headless autonomous runs need self-feedback repair to
* leave the main worker alone; injecting a hidden follow-up prompt into those
* surfaces can steal the next autonomous unit and trip the zero-progress guard.
*
* Consumer: dispatchSelfFeedbackInlineFixIfNeeded when choosing between a
* headless triage subprocess and an interactive `pi.sendMessage` follow-up.
*/
export function isMachineSelfFeedbackSurface(env = process.env) {
return (
env.SF_HEADLESS === "1" ||
env.SF_WEB_BRIDGE_TUI === "1" ||
env.SF_WEB_AUTO_START_AUTONOMOUS === "1"
);
}
/**
* Dispatch a focused inline-fix turn for unresolved high/critical self-feedback.
*
@ -312,7 +332,7 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(basePath, ctx, pi) {
// pipeline through SF's own subprocess machinery (router-resolved model,
// watchdog, trust gate). Fire-and-forget: the autonomous loop will see the
// resolved entries via DB on the next iteration's gate check.
if (process.env.SF_HEADLESS === "1") {
if (isMachineSelfFeedbackSurface()) {
ctx.ui.notify(
`Dispatching self-feedback inline fix via 'sf headless triage --apply' for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"} (headless surface).`,
"warning",

View file

@ -20,7 +20,11 @@ import { afterEach, describe, test } from "vitest";
import { isModelInEnabledList } from "../preferences-models.js";
import "../preferences.js";
import { selectAndApplyModel } from "../auto-model-selection.js";
import {
allowsFreeTierAutoRoute,
filterAutoRoutableModels,
selectAndApplyModel,
} from "../auto-model-selection.js";
// ── Test environment setup ───────────────────────────────────────────────────
@ -232,6 +236,41 @@ describe("isModelInEnabledList", () => {
});
});
describe("free-tier autonomous routing policy", () => {
const PAID = makeCandidate("kimi-coding", "kimi-k2.6");
const FREE = makeCandidate("openrouter", "qwen/qwen3-coder:free");
test("main_worker_units_exclude_free_tier_routes_by_default", () => {
const filtered = filterAutoRoutableModels(
[FREE, PAID],
{ allow_free_models: false },
"execute-task",
);
assert.deepEqual(
filtered.map((m) => `${m.provider}/${m.id}`),
["kimi-coding/kimi-k2.6"],
);
});
test("secondary_units_may_use_free_tier_routes", () => {
assert.equal(allowsFreeTierAutoRoute("research-slice"), true);
assert.equal(allowsFreeTierAutoRoute("triage"), true);
assert.equal(allowsFreeTierAutoRoute("execute-task"), false);
const filtered = filterAutoRoutableModels(
[FREE, PAID],
{},
"research-slice",
);
assert.deepEqual(
filtered.map((m) => `${m.provider}/${m.id}`),
["openrouter/qwen/qwen3-coder:free", "kimi-coding/kimi-k2.6"],
);
});
});
// ── Part 2: fallback chain respects enabledModels ─────────────────────────────
//
// preferences.yaml pins execution chain to:

View file

@ -5,6 +5,7 @@ import { afterEach, describe, expect, test } from "vitest";
import {
buildInlineFixPrompt,
filterTriageCandidatesByProviderPolicy,
isMachineSelfFeedbackSurface,
rankTriageModelsViaRouter,
runTriage,
selectInlineFixCandidates,
@ -361,6 +362,28 @@ describe("buildInlineFixPrompt", () => {
});
});
describe("isMachineSelfFeedbackSurface", () => {
test("treats web RPC bridge as machine surface even before SF_HEADLESS is set", () => {
expect(
isMachineSelfFeedbackSurface({
SF_WEB_BRIDGE_TUI: "1",
}),
).toBe(true);
});
test("treats server auto-start as machine surface", () => {
expect(
isMachineSelfFeedbackSurface({
SF_WEB_AUTO_START_AUTONOMOUS: "1",
}),
).toBe(true);
});
test("keeps ordinary interactive sessions on follow-up path", () => {
expect(isMachineSelfFeedbackSurface({})).toBe(false);
});
});
describe("runTriage (dependency-injected)", () => {
test("returns ok+content on success and flags clean-finish from terminator", async () => {
const fakeMessage = {