From 5f92320c7dbf7594d5013008e2c38da94b7f344e Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 13:46:12 +0200 Subject: [PATCH] fix(auto): timeout silent swarm turns despite heartbeats --- .../extensions/sf/auto-model-selection.js | 102 +++++++++++++++--- .../extensions/sf/preferences-models.js | 30 ++++++ .../sf/tests/preferences-models.test.mjs | 34 +++++- .../sf/tests/swarm-dispatch-and-wait.test.mjs | 39 +++++++ .../extensions/sf/uok/swarm-dispatch.js | 98 ++++++++++++++++- 5 files changed, 289 insertions(+), 14 deletions(-) diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js index 799d63f07..7e44239eb 100644 --- a/src/resources/extensions/sf/auto-model-selection.js +++ b/src/resources/extensions/sf/auto-model-selection.js @@ -3,6 +3,9 @@ * Handles complexity-based routing, model resolution across providers, * and fallback chains. */ +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; import { unitPhaseLabel } from "./auto-dashboard.js"; import { isModelBlocked } from "./blocked-models.js"; import { @@ -10,6 +13,7 @@ import { extractTaskMetadata, tierLabel, } from "./complexity-classifier.js"; +import { getErrorMessage } from "./error-utils.js"; import { getLedger, getProjectTotals } from "./metrics.js"; import { routesFor } from "./model-registry.js"; import { @@ -19,9 +23,9 @@ import { loadCapabilityOverrides, resolveModelForComplexity, } from "./model-router.js"; -import { readStickyModelForUnit } from "./slice-routing-cache.js"; import { filterModelsByProviderModelAllow, + isModelInEnabledList, isProviderAllowedByLists, isProviderAllowedForAdvisor, resolveDynamicRoutingConfig, @@ -29,6 +33,7 @@ import { resolvePersistModelChanges, } from "./preferences-models.js"; import { getSessionModelOverride } from "./session-model-override.js"; +import { readStickyModelForUnit } from "./slice-routing-cache.js"; import { resolveUokFlags } from "./uok/flags.js"; import { applyModelPolicyFilter } from "./uok/model-policy.js"; import { @@ -37,7 +42,43 @@ import { } from "./uok/model-role-policy.js"; import { logWarning } from "./workflow-logger.js"; import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-tools.js"; -import { getErrorMessage } from "./error-utils.js"; + +/** + * Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json. + * + * Returns undefined when: + * - the file is missing or unreadable (→ no constraint) + * - enabledModels is absent or empty (→ no constraint) + * + * IO errors are silently swallowed — a missing settings.json must never + * prevent autonomous dispatch. + * + * Set SF_BYPASS_ENABLED_MODELS=1 to disable the allowlist check entirely + * (emergency escape hatch when the allowlist gets misconfigured). + */ +function readEnabledModels() { + if (process.env.SF_BYPASS_ENABLED_MODELS === "1") return undefined; + try { + const settingsPath = join(homedir(), ".sf", "agent", "settings.json"); + const settings = JSON.parse(readFileSync(settingsPath, "utf-8")); + if (process.env.SF_DEBUG_ENABLED_MODELS === "1") { + process.stderr.write( + `[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`, + ); + } + return Array.isArray(settings?.enabledModels) && + settings.enabledModels.length > 0 + ? settings.enabledModels + : undefined; + } catch (err) { + if (process.env.SF_DEBUG_ENABLED_MODELS === "1") { + process.stderr.write( + `[readEnabledModels] error: ${err?.message}\n`, + ); + } + return undefined; // settings missing or unreadable → no allowlist constraint + } +} /** * Thrown when the model-policy gate rejects every candidate model for a unit * dispatch (#4959 / #4681 / #4850). The auto-loop catches this specifically @@ -273,9 +314,7 @@ export function applyLineageDiverseFilter(candidates, workerModelId) { if (!workerModelId || String(workerModelId).length === 0) { return { filtered: candidates, fellBack: false }; } - const kept = candidates.filter( - (m) => !isSameRootVendor(m.id, workerModelId), - ); + const kept = candidates.filter((m) => !isSameRootVendor(m.id, workerModelId)); if (kept.length === 0) { return { filtered: candidates, fellBack: true }; } @@ -505,8 +544,7 @@ export async function selectAndApplyModel( // Build an allow-set of keys to enforce in the resolution loop. const allowedKeys = new Set( filtered.map( - (m) => - `${m.provider.toLowerCase()}/${m.id.toLowerCase()}`, + (m) => `${m.provider.toLowerCase()}/${m.id.toLowerCase()}`, ), ); lineageAllowedKeys = allowedKeys; @@ -664,11 +702,7 @@ export async function selectAndApplyModel( // on a sibling unit in this slice when its capability score is // within window of the winner. Cleared on executor refusal so a // failing model does not re-attach to the slice. - const stickyHint = readStickyModelForUnit( - basePath, - unitType, - unitId, - ); + const stickyHint = readStickyModelForUnit(basePath, unitType, unitId); routingResult = resolveModelForComplexity( classification, modelConfig, @@ -718,6 +752,14 @@ export async function selectAndApplyModel( effectiveModelConfig.primary, ...effectiveModelConfig.fallbacks, ]; + // ── enabledModels allowlist (operator gate) ────────────────────────── + // Read once per unit dispatch; undefined = no constraint (open). + // SF_BYPASS_ENABLED_MODELS=1 disables the check entirely — emergency + // escape hatch when the allowlist gets misconfigured and autonomous + // can't find a viable fallback. + const enabledModels = readEnabledModels(); + let enabledModelsFilteredCount = 0; + let enabledModelsResolvedCount = 0; // models that existed in the pool (with or without allowlist) let attemptedPolicyEligible = false; for (const modelId of modelsToTry) { const resolutionPool = uokFlags.modelPolicy @@ -733,6 +775,22 @@ export async function selectAndApplyModel( ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info"); continue; } + enabledModelsResolvedCount++; + // ── Enforce operator enabledModels allowlist ────────────────────── + // Applied as the first model-level filter so disallowed providers + // never reach policy/routing checks. enabledModels is only set when + // the operator has an explicit allowlist; otherwise this is a no-op. + if ( + enabledModels !== undefined && + !isModelInEnabledList(model.provider, model.id, enabledModels) + ) { + ctx.ui.notify( + `Model ${model.provider}/${model.id} not in enabledModels allowlist; trying next fallback.`, + "info", + ); + enabledModelsFilteredCount++; + continue; + } // Enforce lineage-diverse-from-worker: skip candidates whose root vendor // matches the worker's. lineageAllowedKeys is populated only when the // constraint applies (adversary / reviewer with a known worker model id). @@ -873,6 +931,22 @@ export async function selectAndApplyModel( } } } + // ── enabledModels exhaustion error ─────────────────────────────────── + // When every resolved candidate was filtered out by the enabledModels + // allowlist (and no model was applied), surface a clear operator- + // actionable error rather than silently falling back to the session default. + if ( + appliedModel === null && + enabledModels !== undefined && + enabledModelsResolvedCount > 0 && + enabledModelsFilteredCount === enabledModelsResolvedCount + ) { + throw new Error( + `All fallback candidates filtered by enabledModels allowlist. ` + + `Either add a provider/model to enabledModels in ~/.sf/agent/settings.json ` + + `or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`, + ); + } if ( uokFlags.modelPolicy && policyAllowedModelKeys && @@ -909,6 +983,7 @@ export async function selectAndApplyModel( } else if (autoModeStartModel) { // No model preference for this unit type — re-apply the model captured // at autonomous mode start to prevent bleed from shared global settings.json (#650). + const startEnabledModels = readEnabledModels(); const availableModels = filterModelsByProviderModelAllow( ctx.modelRegistry .getAvailable() @@ -918,6 +993,9 @@ export async function selectAndApplyModel( prefs?.allowed_providers, prefs?.blocked_providers, ), + ) + .filter((m) => + isModelInEnabledList(m.provider, m.id, startEnabledModels), ), prefs?.provider_model_allow, prefs?.provider_model_block, diff --git a/src/resources/extensions/sf/preferences-models.js b/src/resources/extensions/sf/preferences-models.js index 7e9ba782d..0a40fae55 100644 --- a/src/resources/extensions/sf/preferences-models.js +++ b/src/resources/extensions/sf/preferences-models.js @@ -195,6 +195,36 @@ export function filterModelsByProviderModelAllow( ), ); } +/** + * Check whether a provider/modelId combination is in the operator's + * enabledModels allowlist from ~/.sf/agent/settings.json. + * + * Returns true when: + * - enabledModels is undefined/empty (no filter — all allowed) + * - any pattern matches "/" + * + * Pattern syntax: "provider/*" (all models in a provider) or + * "provider/specific-model" (exact). Wildcards only at the end of the + * pattern; intentional to keep policy simple and auditable. + * + * @param {string} provider - e.g. "mistral" + * @param {string} modelId - e.g. "mistral-large-2512" + * @param {string[]|undefined} enabledModels + * @returns {boolean} + */ +export function isModelInEnabledList(provider, modelId, enabledModels) { + if (!Array.isArray(enabledModels) || enabledModels.length === 0) return true; + const candidate = `${provider}/${modelId}`; + for (const pattern of enabledModels) { + if (typeof pattern !== "string") continue; + if (pattern === candidate) return true; + if (pattern.endsWith("/*")) { + const prefix = pattern.slice(0, -1); // e.g. "mistral/" + if (candidate.startsWith(prefix)) return true; + } + } + return false; +} /** * Check if a provider is in the allowed list and not in the blocked list. */ diff --git a/src/resources/extensions/sf/tests/preferences-models.test.mjs b/src/resources/extensions/sf/tests/preferences-models.test.mjs index 63e36c837..ec0808155 100644 --- a/src/resources/extensions/sf/tests/preferences-models.test.mjs +++ b/src/resources/extensions/sf/tests/preferences-models.test.mjs @@ -5,7 +5,10 @@ import { join } from "node:path"; import { afterEach, describe, test } from "vitest"; // Import preferences.js so that _initPrefsLoader is called and the circular dep lazy-loader is wired up. import "../preferences.js"; -import { resolveModelWithFallbacksForUnit } from "../preferences-models.js"; +import { + isModelInEnabledList, + resolveModelWithFallbacksForUnit, +} from "../preferences-models.js"; const originalCwd = process.cwd(); const originalEnv = { ...process.env }; @@ -107,4 +110,33 @@ describe("preferences model resolution", () => { fallbacks: [], }); }); + + test("isModelInEnabledList_when_list_empty_allows_any_model", () => { + assert.equal(isModelInEnabledList("kimi-coding", "kimi-k2.6", []), true); + assert.equal( + isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined), + true, + ); + }); + + test("isModelInEnabledList_when_exact_or_provider_wildcard_matches_returns_true", () => { + assert.equal( + isModelInEnabledList("kimi-coding", "kimi-k2.6", [ + "openai/gpt-5.2", + "kimi-coding/kimi-k2.6", + ]), + true, + ); + assert.equal( + isModelInEnabledList("kimi-coding", "kimi-k2.6", ["kimi-coding/*"]), + true, + ); + }); + + test("isModelInEnabledList_when_no_pattern_matches_returns_false", () => { + assert.equal( + isModelInEnabledList("kimi-coding", "kimi-k2.6", ["openai/*"]), + false, + ); + }); }); diff --git a/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs b/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs index d44a31bc9..f07e6c892 100644 --- a/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs +++ b/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs @@ -581,6 +581,45 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar expect(capturedOpts.noOutputTimeoutMs).toBe(45_000); }); + test("noOutputTimeoutMs aborts a silent turn even when runtime heartbeats continue", async () => { + vi.useFakeTimers(); + const { runAgentTurn } = await import("../uok/agent-runner.js"); + + runAgentTurn.mockImplementationOnce(async (_agent, opts = {}) => { + setTimeout(() => { + opts.onEvent?.({ type: "runtime_heartbeat" }); + }, 5); + setTimeout(() => { + opts.onEvent?.({ type: "runtime_heartbeat" }); + }, 9); + return new Promise(() => { + // Simulate setup/model code that ignores AbortSignal. The outer watchdog + // must still resolve dispatchAndWait instead of waiting for this promise. + }); + }); + + const root = makeProject(); + const layer = new SwarmDispatchLayer(root); + const pending = layer.dispatchAndWait( + { + unitId: "task-heartbeat-only-timeout", + unitType: "execute-task", + workMode: "build", + payload: "edit files", + priority: 5, + scope: "scope-heartbeat-only-timeout", + }, + { noOutputTimeoutMs: 10, timeoutMs: 1_000 }, + ); + + await vi.advanceTimersByTimeAsync(20); + const result = await pending; + vi.useRealTimers(); + + expect(result.reply).toBeNull(); + expect(result.error).toContain("produced no output for 10ms"); + }); + test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => { // Envelopes without the optional fields must not pass undefined opts to runAgentTurn. const { runAgentTurn } = await import("../uok/agent-runner.js"); diff --git a/src/resources/extensions/sf/uok/swarm-dispatch.js b/src/resources/extensions/sf/uok/swarm-dispatch.js index 6fddebf34..b8c6cc751 100644 --- a/src/resources/extensions/sf/uok/swarm-dispatch.js +++ b/src/resources/extensions/sf/uok/swarm-dispatch.js @@ -41,6 +41,102 @@ async function getRunAgentTurn() { return _runAgentTurnFn; } +function isMeaningfulAgentTurnEvent(event) { + if (String(event?.type) === "runtime_heartbeat") return false; + if (event?.type === "message_update") { + const streamEvent = event.assistantMessageEvent; + if (streamEvent?.type === "text_delta") { + return String(streamEvent.delta ?? "").length > 0; + } + return true; + } + return true; +} + +async function runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, opts = {}) { + const { timeoutMs = 480_000, noOutputTimeoutMs, signal, onEvent } = opts; + const controller = new AbortController(); + let hardTimer = null; + let noOutputTimer = null; + let settled = false; + let resolveWatchdog = null; + const watchdogResult = new Promise((resolve) => { + resolveWatchdog = resolve; + }); + + function abort(reason) { + if (settled) return; + try { + controller.abort(reason); + } catch { + controller.abort(); + } + resolveWatchdog?.({ + turnsProcessed: 0, + response: null, + error: reason, + }); + } + + function cleanup() { + settled = true; + if (hardTimer) clearTimeout(hardTimer); + if (noOutputTimer) clearTimeout(noOutputTimer); + } + + function armNoOutputTimer() { + if (!noOutputTimeoutMs || noOutputTimeoutMs <= 0) return; + if (noOutputTimer) clearTimeout(noOutputTimer); + noOutputTimer = setTimeout(() => { + abort(`agent turn produced no output for ${noOutputTimeoutMs}ms`); + }, noOutputTimeoutMs); + } + + if (signal) { + if (signal.aborted) abort("agent turn cancelled"); + else + signal.addEventListener("abort", () => abort("agent turn cancelled"), { + once: true, + }); + } + if (timeoutMs > 0) { + hardTimer = setTimeout(() => { + abort(`agent turn timed out after ${timeoutMs}ms`); + }, timeoutMs); + } + armNoOutputTimer(); + + const turnResult = runAgentTurn(agent, { + ...opts, + signal: controller.signal, + onEvent: (event) => { + if (isMeaningfulAgentTurnEvent(event)) armNoOutputTimer(); + onEvent?.(event); + }, + }).catch((err) => { + if (controller.signal.aborted) { + return { + turnsProcessed: 0, + response: null, + error: + typeof controller.signal.reason === "string" + ? controller.signal.reason + : "agent turn aborted", + }; + } + throw err; + }); + + try { + const result = await Promise.race([turnResult, watchdogResult]); + cleanup(); + return result; + } catch (err) { + cleanup(); + throw err; + } +} + // Module-level cache keyed by `${basePath}:${swarmName}` const _cache = new Map(); @@ -334,7 +430,7 @@ export class SwarmDispatchLayer { const runAgentTurn = await getRunAgentTurn(); let turnResult; try { - turnResult = await runAgentTurn(agent, { + turnResult = await runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, { timeoutMs, ...(noOutputTimeoutMs ? { noOutputTimeoutMs } : {}), signal,