fix(auto): timeout silent swarm turns despite heartbeats

This commit is contained in:
Mikael Hugo 2026-05-15 13:46:12 +02:00
parent 85f6650852
commit 5f92320c7d
5 changed files with 289 additions and 14 deletions

View file

@ -3,6 +3,9 @@
* Handles complexity-based routing, model resolution across providers, * Handles complexity-based routing, model resolution across providers,
* and fallback chains. * and fallback chains.
*/ */
import { readFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { unitPhaseLabel } from "./auto-dashboard.js"; import { unitPhaseLabel } from "./auto-dashboard.js";
import { isModelBlocked } from "./blocked-models.js"; import { isModelBlocked } from "./blocked-models.js";
import { import {
@ -10,6 +13,7 @@ import {
extractTaskMetadata, extractTaskMetadata,
tierLabel, tierLabel,
} from "./complexity-classifier.js"; } from "./complexity-classifier.js";
import { getErrorMessage } from "./error-utils.js";
import { getLedger, getProjectTotals } from "./metrics.js"; import { getLedger, getProjectTotals } from "./metrics.js";
import { routesFor } from "./model-registry.js"; import { routesFor } from "./model-registry.js";
import { import {
@ -19,9 +23,9 @@ import {
loadCapabilityOverrides, loadCapabilityOverrides,
resolveModelForComplexity, resolveModelForComplexity,
} from "./model-router.js"; } from "./model-router.js";
import { readStickyModelForUnit } from "./slice-routing-cache.js";
import { import {
filterModelsByProviderModelAllow, filterModelsByProviderModelAllow,
isModelInEnabledList,
isProviderAllowedByLists, isProviderAllowedByLists,
isProviderAllowedForAdvisor, isProviderAllowedForAdvisor,
resolveDynamicRoutingConfig, resolveDynamicRoutingConfig,
@ -29,6 +33,7 @@ import {
resolvePersistModelChanges, resolvePersistModelChanges,
} from "./preferences-models.js"; } from "./preferences-models.js";
import { getSessionModelOverride } from "./session-model-override.js"; import { getSessionModelOverride } from "./session-model-override.js";
import { readStickyModelForUnit } from "./slice-routing-cache.js";
import { resolveUokFlags } from "./uok/flags.js"; import { resolveUokFlags } from "./uok/flags.js";
import { applyModelPolicyFilter } from "./uok/model-policy.js"; import { applyModelPolicyFilter } from "./uok/model-policy.js";
import { import {
@ -37,7 +42,43 @@ import {
} from "./uok/model-role-policy.js"; } from "./uok/model-role-policy.js";
import { logWarning } from "./workflow-logger.js"; import { logWarning } from "./workflow-logger.js";
import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-tools.js"; import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-tools.js";
import { getErrorMessage } from "./error-utils.js";
/**
* Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json.
*
* Returns undefined when:
* - the file is missing or unreadable ( no constraint)
* - enabledModels is absent or empty ( no constraint)
*
* IO errors are silently swallowed a missing settings.json must never
* prevent autonomous dispatch.
*
* Set SF_BYPASS_ENABLED_MODELS=1 to disable the allowlist check entirely
* (emergency escape hatch when the allowlist gets misconfigured).
*/
function readEnabledModels() {
if (process.env.SF_BYPASS_ENABLED_MODELS === "1") return undefined;
try {
const settingsPath = join(homedir(), ".sf", "agent", "settings.json");
const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
process.stderr.write(
`[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`,
);
}
return Array.isArray(settings?.enabledModels) &&
settings.enabledModels.length > 0
? settings.enabledModels
: undefined;
} catch (err) {
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
process.stderr.write(
`[readEnabledModels] error: ${err?.message}\n`,
);
}
return undefined; // settings missing or unreadable → no allowlist constraint
}
}
/** /**
* Thrown when the model-policy gate rejects every candidate model for a unit * Thrown when the model-policy gate rejects every candidate model for a unit
* dispatch (#4959 / #4681 / #4850). The auto-loop catches this specifically * dispatch (#4959 / #4681 / #4850). The auto-loop catches this specifically
@ -273,9 +314,7 @@ export function applyLineageDiverseFilter(candidates, workerModelId) {
if (!workerModelId || String(workerModelId).length === 0) { if (!workerModelId || String(workerModelId).length === 0) {
return { filtered: candidates, fellBack: false }; return { filtered: candidates, fellBack: false };
} }
const kept = candidates.filter( const kept = candidates.filter((m) => !isSameRootVendor(m.id, workerModelId));
(m) => !isSameRootVendor(m.id, workerModelId),
);
if (kept.length === 0) { if (kept.length === 0) {
return { filtered: candidates, fellBack: true }; return { filtered: candidates, fellBack: true };
} }
@ -505,8 +544,7 @@ export async function selectAndApplyModel(
// Build an allow-set of keys to enforce in the resolution loop. // Build an allow-set of keys to enforce in the resolution loop.
const allowedKeys = new Set( const allowedKeys = new Set(
filtered.map( filtered.map(
(m) => (m) => `${m.provider.toLowerCase()}/${m.id.toLowerCase()}`,
`${m.provider.toLowerCase()}/${m.id.toLowerCase()}`,
), ),
); );
lineageAllowedKeys = allowedKeys; lineageAllowedKeys = allowedKeys;
@ -664,11 +702,7 @@ export async function selectAndApplyModel(
// on a sibling unit in this slice when its capability score is // on a sibling unit in this slice when its capability score is
// within window of the winner. Cleared on executor refusal so a // within window of the winner. Cleared on executor refusal so a
// failing model does not re-attach to the slice. // failing model does not re-attach to the slice.
const stickyHint = readStickyModelForUnit( const stickyHint = readStickyModelForUnit(basePath, unitType, unitId);
basePath,
unitType,
unitId,
);
routingResult = resolveModelForComplexity( routingResult = resolveModelForComplexity(
classification, classification,
modelConfig, modelConfig,
@ -718,6 +752,14 @@ export async function selectAndApplyModel(
effectiveModelConfig.primary, effectiveModelConfig.primary,
...effectiveModelConfig.fallbacks, ...effectiveModelConfig.fallbacks,
]; ];
// ── enabledModels allowlist (operator gate) ──────────────────────────
// Read once per unit dispatch; undefined = no constraint (open).
// SF_BYPASS_ENABLED_MODELS=1 disables the check entirely — emergency
// escape hatch when the allowlist gets misconfigured and autonomous
// can't find a viable fallback.
const enabledModels = readEnabledModels();
let enabledModelsFilteredCount = 0;
let enabledModelsResolvedCount = 0; // models that existed in the pool (with or without allowlist)
let attemptedPolicyEligible = false; let attemptedPolicyEligible = false;
for (const modelId of modelsToTry) { for (const modelId of modelsToTry) {
const resolutionPool = uokFlags.modelPolicy const resolutionPool = uokFlags.modelPolicy
@ -733,6 +775,22 @@ export async function selectAndApplyModel(
ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info"); ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info");
continue; continue;
} }
enabledModelsResolvedCount++;
// ── Enforce operator enabledModels allowlist ──────────────────────
// Applied as the first model-level filter so disallowed providers
// never reach policy/routing checks. enabledModels is only set when
// the operator has an explicit allowlist; otherwise this is a no-op.
if (
enabledModels !== undefined &&
!isModelInEnabledList(model.provider, model.id, enabledModels)
) {
ctx.ui.notify(
`Model ${model.provider}/${model.id} not in enabledModels allowlist; trying next fallback.`,
"info",
);
enabledModelsFilteredCount++;
continue;
}
// Enforce lineage-diverse-from-worker: skip candidates whose root vendor // Enforce lineage-diverse-from-worker: skip candidates whose root vendor
// matches the worker's. lineageAllowedKeys is populated only when the // matches the worker's. lineageAllowedKeys is populated only when the
// constraint applies (adversary / reviewer with a known worker model id). // constraint applies (adversary / reviewer with a known worker model id).
@ -873,6 +931,22 @@ export async function selectAndApplyModel(
} }
} }
} }
// ── enabledModels exhaustion error ───────────────────────────────────
// When every resolved candidate was filtered out by the enabledModels
// allowlist (and no model was applied), surface a clear operator-
// actionable error rather than silently falling back to the session default.
if (
appliedModel === null &&
enabledModels !== undefined &&
enabledModelsResolvedCount > 0 &&
enabledModelsFilteredCount === enabledModelsResolvedCount
) {
throw new Error(
`All fallback candidates filtered by enabledModels allowlist. ` +
`Either add a provider/model to enabledModels in ~/.sf/agent/settings.json ` +
`or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`,
);
}
if ( if (
uokFlags.modelPolicy && uokFlags.modelPolicy &&
policyAllowedModelKeys && policyAllowedModelKeys &&
@ -909,6 +983,7 @@ export async function selectAndApplyModel(
} else if (autoModeStartModel) { } else if (autoModeStartModel) {
// No model preference for this unit type — re-apply the model captured // No model preference for this unit type — re-apply the model captured
// at autonomous mode start to prevent bleed from shared global settings.json (#650). // at autonomous mode start to prevent bleed from shared global settings.json (#650).
const startEnabledModels = readEnabledModels();
const availableModels = filterModelsByProviderModelAllow( const availableModels = filterModelsByProviderModelAllow(
ctx.modelRegistry ctx.modelRegistry
.getAvailable() .getAvailable()
@ -918,6 +993,9 @@ export async function selectAndApplyModel(
prefs?.allowed_providers, prefs?.allowed_providers,
prefs?.blocked_providers, prefs?.blocked_providers,
), ),
)
.filter((m) =>
isModelInEnabledList(m.provider, m.id, startEnabledModels),
), ),
prefs?.provider_model_allow, prefs?.provider_model_allow,
prefs?.provider_model_block, prefs?.provider_model_block,

View file

@ -195,6 +195,36 @@ export function filterModelsByProviderModelAllow(
), ),
); );
} }
/**
* Check whether a provider/modelId combination is in the operator's
* enabledModels allowlist from ~/.sf/agent/settings.json.
*
* Returns true when:
* - enabledModels is undefined/empty (no filter all allowed)
* - any pattern matches "<provider>/<modelId>"
*
* Pattern syntax: "provider/*" (all models in a provider) or
* "provider/specific-model" (exact). Wildcards only at the end of the
* pattern; intentional to keep policy simple and auditable.
*
* @param {string} provider - e.g. "mistral"
* @param {string} modelId - e.g. "mistral-large-2512"
* @param {string[]|undefined} enabledModels
* @returns {boolean}
*/
export function isModelInEnabledList(provider, modelId, enabledModels) {
if (!Array.isArray(enabledModels) || enabledModels.length === 0) return true;
const candidate = `${provider}/${modelId}`;
for (const pattern of enabledModels) {
if (typeof pattern !== "string") continue;
if (pattern === candidate) return true;
if (pattern.endsWith("/*")) {
const prefix = pattern.slice(0, -1); // e.g. "mistral/"
if (candidate.startsWith(prefix)) return true;
}
}
return false;
}
/** /**
* Check if a provider is in the allowed list and not in the blocked list. * Check if a provider is in the allowed list and not in the blocked list.
*/ */

View file

@ -5,7 +5,10 @@ import { join } from "node:path";
import { afterEach, describe, test } from "vitest"; import { afterEach, describe, test } from "vitest";
// Import preferences.js so that _initPrefsLoader is called and the circular dep lazy-loader is wired up. // Import preferences.js so that _initPrefsLoader is called and the circular dep lazy-loader is wired up.
import "../preferences.js"; import "../preferences.js";
import { resolveModelWithFallbacksForUnit } from "../preferences-models.js"; import {
isModelInEnabledList,
resolveModelWithFallbacksForUnit,
} from "../preferences-models.js";
const originalCwd = process.cwd(); const originalCwd = process.cwd();
const originalEnv = { ...process.env }; const originalEnv = { ...process.env };
@ -107,4 +110,33 @@ describe("preferences model resolution", () => {
fallbacks: [], fallbacks: [],
}); });
}); });
test("isModelInEnabledList_when_list_empty_allows_any_model", () => {
assert.equal(isModelInEnabledList("kimi-coding", "kimi-k2.6", []), true);
assert.equal(
isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined),
true,
);
});
test("isModelInEnabledList_when_exact_or_provider_wildcard_matches_returns_true", () => {
assert.equal(
isModelInEnabledList("kimi-coding", "kimi-k2.6", [
"openai/gpt-5.2",
"kimi-coding/kimi-k2.6",
]),
true,
);
assert.equal(
isModelInEnabledList("kimi-coding", "kimi-k2.6", ["kimi-coding/*"]),
true,
);
});
test("isModelInEnabledList_when_no_pattern_matches_returns_false", () => {
assert.equal(
isModelInEnabledList("kimi-coding", "kimi-k2.6", ["openai/*"]),
false,
);
});
}); });

View file

@ -581,6 +581,45 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar
expect(capturedOpts.noOutputTimeoutMs).toBe(45_000); expect(capturedOpts.noOutputTimeoutMs).toBe(45_000);
}); });
test("noOutputTimeoutMs aborts a silent turn even when runtime heartbeats continue", async () => {
vi.useFakeTimers();
const { runAgentTurn } = await import("../uok/agent-runner.js");
runAgentTurn.mockImplementationOnce(async (_agent, opts = {}) => {
setTimeout(() => {
opts.onEvent?.({ type: "runtime_heartbeat" });
}, 5);
setTimeout(() => {
opts.onEvent?.({ type: "runtime_heartbeat" });
}, 9);
return new Promise(() => {
// Simulate setup/model code that ignores AbortSignal. The outer watchdog
// must still resolve dispatchAndWait instead of waiting for this promise.
});
});
const root = makeProject();
const layer = new SwarmDispatchLayer(root);
const pending = layer.dispatchAndWait(
{
unitId: "task-heartbeat-only-timeout",
unitType: "execute-task",
workMode: "build",
payload: "edit files",
priority: 5,
scope: "scope-heartbeat-only-timeout",
},
{ noOutputTimeoutMs: 10, timeoutMs: 1_000 },
);
await vi.advanceTimersByTimeAsync(20);
const result = await pending;
vi.useRealTimers();
expect(result.reply).toBeNull();
expect(result.error).toContain("produced no output for 10ms");
});
test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => { test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => {
// Envelopes without the optional fields must not pass undefined opts to runAgentTurn. // Envelopes without the optional fields must not pass undefined opts to runAgentTurn.
const { runAgentTurn } = await import("../uok/agent-runner.js"); const { runAgentTurn } = await import("../uok/agent-runner.js");

View file

@ -41,6 +41,102 @@ async function getRunAgentTurn() {
return _runAgentTurnFn; return _runAgentTurnFn;
} }
function isMeaningfulAgentTurnEvent(event) {
if (String(event?.type) === "runtime_heartbeat") return false;
if (event?.type === "message_update") {
const streamEvent = event.assistantMessageEvent;
if (streamEvent?.type === "text_delta") {
return String(streamEvent.delta ?? "").length > 0;
}
return true;
}
return true;
}
async function runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, opts = {}) {
const { timeoutMs = 480_000, noOutputTimeoutMs, signal, onEvent } = opts;
const controller = new AbortController();
let hardTimer = null;
let noOutputTimer = null;
let settled = false;
let resolveWatchdog = null;
const watchdogResult = new Promise((resolve) => {
resolveWatchdog = resolve;
});
function abort(reason) {
if (settled) return;
try {
controller.abort(reason);
} catch {
controller.abort();
}
resolveWatchdog?.({
turnsProcessed: 0,
response: null,
error: reason,
});
}
function cleanup() {
settled = true;
if (hardTimer) clearTimeout(hardTimer);
if (noOutputTimer) clearTimeout(noOutputTimer);
}
function armNoOutputTimer() {
if (!noOutputTimeoutMs || noOutputTimeoutMs <= 0) return;
if (noOutputTimer) clearTimeout(noOutputTimer);
noOutputTimer = setTimeout(() => {
abort(`agent turn produced no output for ${noOutputTimeoutMs}ms`);
}, noOutputTimeoutMs);
}
if (signal) {
if (signal.aborted) abort("agent turn cancelled");
else
signal.addEventListener("abort", () => abort("agent turn cancelled"), {
once: true,
});
}
if (timeoutMs > 0) {
hardTimer = setTimeout(() => {
abort(`agent turn timed out after ${timeoutMs}ms`);
}, timeoutMs);
}
armNoOutputTimer();
const turnResult = runAgentTurn(agent, {
...opts,
signal: controller.signal,
onEvent: (event) => {
if (isMeaningfulAgentTurnEvent(event)) armNoOutputTimer();
onEvent?.(event);
},
}).catch((err) => {
if (controller.signal.aborted) {
return {
turnsProcessed: 0,
response: null,
error:
typeof controller.signal.reason === "string"
? controller.signal.reason
: "agent turn aborted",
};
}
throw err;
});
try {
const result = await Promise.race([turnResult, watchdogResult]);
cleanup();
return result;
} catch (err) {
cleanup();
throw err;
}
}
// Module-level cache keyed by `${basePath}:${swarmName}` // Module-level cache keyed by `${basePath}:${swarmName}`
const _cache = new Map(); const _cache = new Map();
@ -334,7 +430,7 @@ export class SwarmDispatchLayer {
const runAgentTurn = await getRunAgentTurn(); const runAgentTurn = await getRunAgentTurn();
let turnResult; let turnResult;
try { try {
turnResult = await runAgentTurn(agent, { turnResult = await runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, {
timeoutMs, timeoutMs,
...(noOutputTimeoutMs ? { noOutputTimeoutMs } : {}), ...(noOutputTimeoutMs ? { noOutputTimeoutMs } : {}),
signal, signal,