fix(auto): timeout silent swarm turns despite heartbeats

2026-05-15 13:46:12 +02:00 · 2026-05-15 13:46:12 +02:00 · 5f92320c7d
commit 5f92320c7d
parent 85f6650852
5 changed files with 289 additions and 14 deletions
--- a/src/resources/extensions/sf/auto-model-selection.js
+++ b/src/resources/extensions/sf/auto-model-selection.js
@ -3,6 +3,9 @@
 * Handles complexity-based routing, model resolution across providers,
 * and fallback chains.
 */
 import { readFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 import { unitPhaseLabel } from "./auto-dashboard.js";
 import { isModelBlocked } from "./blocked-models.js";
 import {
@ -10,6 +13,7 @@ import {
 	extractTaskMetadata,
 	tierLabel,
 } from "./complexity-classifier.js";
 import { getErrorMessage } from "./error-utils.js";
 import { getLedger, getProjectTotals } from "./metrics.js";
 import { routesFor } from "./model-registry.js";
 import {
@ -19,9 +23,9 @@ import {
 	loadCapabilityOverrides,
 	resolveModelForComplexity,
 } from "./model-router.js";
 import { readStickyModelForUnit } from "./slice-routing-cache.js";
 import {
 	filterModelsByProviderModelAllow,
 	isModelInEnabledList,
 	isProviderAllowedByLists,
 	isProviderAllowedForAdvisor,
 	resolveDynamicRoutingConfig,
@ -29,6 +33,7 @@ import {
 	resolvePersistModelChanges,
 } from "./preferences-models.js";
 import { getSessionModelOverride } from "./session-model-override.js";
 import { readStickyModelForUnit } from "./slice-routing-cache.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { applyModelPolicyFilter } from "./uok/model-policy.js";
 import {
@ -37,7 +42,43 @@ import {
 } from "./uok/model-role-policy.js";
 import { logWarning } from "./workflow-logger.js";
 import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-tools.js";
-import { getErrorMessage } from "./error-utils.js";
+
 /**
 * Read the operator's enabledModels allowlist from ~/.sf/agent/settings.json.
 *
 * Returns undefined when:
 *   - the file is missing or unreadable (→ no constraint)
 *   - enabledModels is absent or empty (→ no constraint)
 *
 * IO errors are silently swallowed — a missing settings.json must never
 * prevent autonomous dispatch.
 *
 * Set SF_BYPASS_ENABLED_MODELS=1 to disable the allowlist check entirely
 * (emergency escape hatch when the allowlist gets misconfigured).
 */
 function readEnabledModels() {
 	if (process.env.SF_BYPASS_ENABLED_MODELS === "1") return undefined;
 	try {
 		const settingsPath = join(homedir(), ".sf", "agent", "settings.json");
 		const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
 		if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
 			process.stderr.write(
 				`[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`,
 			);
 		}
 		return Array.isArray(settings?.enabledModels) &&
 			settings.enabledModels.length > 0
 			? settings.enabledModels
 			: undefined;
 	} catch (err) {
 		if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
 			process.stderr.write(
 				`[readEnabledModels] error: ${err?.message}\n`,
 			);
 		}
 		return undefined; // settings missing or unreadable → no allowlist constraint
 	}
 }
 /**
 * Thrown when the model-policy gate rejects every candidate model for a unit
 * dispatch (#4959 / #4681 / #4850).  The auto-loop catches this specifically
@ -273,9 +314,7 @@ export function applyLineageDiverseFilter(candidates, workerModelId) {
 	if (!workerModelId || String(workerModelId).length === 0) {
 		return { filtered: candidates, fellBack: false };
 	}
-	const kept = candidates.filter(
+	const kept = candidates.filter((m) => !isSameRootVendor(m.id, workerModelId));
 		(m) => !isSameRootVendor(m.id, workerModelId),
 	);
 	if (kept.length === 0) {
 		return { filtered: candidates, fellBack: true };
 	}
@ -505,8 +544,7 @@ export async function selectAndApplyModel(
 							// Build an allow-set of keys to enforce in the resolution loop.
 							const allowedKeys = new Set(
 								filtered.map(
-									(m) =>
+									(m) => `${m.provider.toLowerCase()}/${m.id.toLowerCase()}`,
 										`${m.provider.toLowerCase()}/${m.id.toLowerCase()}`,
 								),
 							);
 							lineageAllowedKeys = allowedKeys;
@ -664,11 +702,7 @@ export async function selectAndApplyModel(
 					// on a sibling unit in this slice when its capability score is
 					// within window of the winner. Cleared on executor refusal so a
 					// failing model does not re-attach to the slice.
-					const stickyHint = readStickyModelForUnit(
+					const stickyHint = readStickyModelForUnit(basePath, unitType, unitId);
 						basePath,
 						unitType,
 						unitId,
 					);
 					routingResult = resolveModelForComplexity(
 						classification,
 						modelConfig,
@ -718,6 +752,14 @@ export async function selectAndApplyModel(
 			effectiveModelConfig.primary,
 			...effectiveModelConfig.fallbacks,
 		];
 		// ── enabledModels allowlist (operator gate) ──────────────────────────
 		// Read once per unit dispatch; undefined = no constraint (open).
 		// SF_BYPASS_ENABLED_MODELS=1 disables the check entirely — emergency
 		// escape hatch when the allowlist gets misconfigured and autonomous
 		// can't find a viable fallback.
 		const enabledModels = readEnabledModels();
 		let enabledModelsFilteredCount = 0;
 		let enabledModelsResolvedCount = 0; // models that existed in the pool (with or without allowlist)
 		let attemptedPolicyEligible = false;
 		for (const modelId of modelsToTry) {
 			const resolutionPool = uokFlags.modelPolicy
@ -733,6 +775,22 @@ export async function selectAndApplyModel(
 					ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info");
 				continue;
 			}
 			enabledModelsResolvedCount++;
 			// ── Enforce operator enabledModels allowlist ──────────────────────
 			// Applied as the first model-level filter so disallowed providers
 			// never reach policy/routing checks. enabledModels is only set when
 			// the operator has an explicit allowlist; otherwise this is a no-op.
 			if (
 				enabledModels !== undefined &&
 				!isModelInEnabledList(model.provider, model.id, enabledModels)
 			) {
 				ctx.ui.notify(
 					`Model ${model.provider}/${model.id} not in enabledModels allowlist; trying next fallback.`,
 					"info",
 				);
 				enabledModelsFilteredCount++;
 				continue;
 			}
 			// Enforce lineage-diverse-from-worker: skip candidates whose root vendor
 			// matches the worker's.  lineageAllowedKeys is populated only when the
 			// constraint applies (adversary / reviewer with a known worker model id).
@ -873,6 +931,22 @@ export async function selectAndApplyModel(
 				}
 			}
 		}
 		// ── enabledModels exhaustion error ───────────────────────────────────
 		// When every resolved candidate was filtered out by the enabledModels
 		// allowlist (and no model was applied), surface a clear operator-
 		// actionable error rather than silently falling back to the session default.
 		if (
 			appliedModel === null &&
 			enabledModels !== undefined &&
 			enabledModelsResolvedCount > 0 &&
 			enabledModelsFilteredCount === enabledModelsResolvedCount
 		) {
 			throw new Error(
 				`All fallback candidates filtered by enabledModels allowlist. ` +
 					`Either add a provider/model to enabledModels in ~/.sf/agent/settings.json ` +
 					`or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`,
 			);
 		}
 		if (
 			uokFlags.modelPolicy &&
 			policyAllowedModelKeys &&
@ -909,6 +983,7 @@ export async function selectAndApplyModel(
 	} else if (autoModeStartModel) {
 		// No model preference for this unit type — re-apply the model captured
 		// at autonomous mode start to prevent bleed from shared global settings.json (#650).
 		const startEnabledModels = readEnabledModels();
 		const availableModels = filterModelsByProviderModelAllow(
 			ctx.modelRegistry
 				.getAvailable()
@ -918,6 +993,9 @@ export async function selectAndApplyModel(
 						prefs?.allowed_providers,
 						prefs?.blocked_providers,
 					),
 				)
 				.filter((m) =>
 					isModelInEnabledList(m.provider, m.id, startEnabledModels),
 				),
 			prefs?.provider_model_allow,
 			prefs?.provider_model_block,
--- a/src/resources/extensions/sf/preferences-models.js
+++ b/src/resources/extensions/sf/preferences-models.js
@ -195,6 +195,36 @@ export function filterModelsByProviderModelAllow(
 		),
 	);
 }
 /**
 * Check whether a provider/modelId combination is in the operator's
 * enabledModels allowlist from ~/.sf/agent/settings.json.
 *
 * Returns true when:
 *   - enabledModels is undefined/empty (no filter — all allowed)
 *   - any pattern matches "<provider>/<modelId>"
 *
 * Pattern syntax: "provider/*" (all models in a provider) or
 * "provider/specific-model" (exact). Wildcards only at the end of the
 * pattern; intentional to keep policy simple and auditable.
 *
 * @param {string} provider - e.g. "mistral"
 * @param {string} modelId  - e.g. "mistral-large-2512"
 * @param {string[]|undefined} enabledModels
 * @returns {boolean}
 */
 export function isModelInEnabledList(provider, modelId, enabledModels) {
 	if (!Array.isArray(enabledModels) || enabledModels.length === 0) return true;
 	const candidate = `${provider}/${modelId}`;
 	for (const pattern of enabledModels) {
 		if (typeof pattern !== "string") continue;
 		if (pattern === candidate) return true;
 		if (pattern.endsWith("/*")) {
 			const prefix = pattern.slice(0, -1); // e.g. "mistral/"
 			if (candidate.startsWith(prefix)) return true;
 		}
 	}
 	return false;
 }
 /**
 * Check if a provider is in the allowed list and not in the blocked list.
 */
--- a/src/resources/extensions/sf/tests/preferences-models.test.mjs
+++ b/src/resources/extensions/sf/tests/preferences-models.test.mjs
@ -5,7 +5,10 @@ import { join } from "node:path";
 import { afterEach, describe, test } from "vitest";
 // Import preferences.js so that _initPrefsLoader is called and the circular dep lazy-loader is wired up.
 import "../preferences.js";
-import { resolveModelWithFallbacksForUnit } from "../preferences-models.js";
+import {
 	isModelInEnabledList,
 	resolveModelWithFallbacksForUnit,
 } from "../preferences-models.js";
 const originalCwd = process.cwd();
 const originalEnv = { ...process.env };
@ -107,4 +110,33 @@ describe("preferences model resolution", () => {
 			fallbacks: [],
 		});
 	});
 	test("isModelInEnabledList_when_list_empty_allows_any_model", () => {
 		assert.equal(isModelInEnabledList("kimi-coding", "kimi-k2.6", []), true);
 		assert.equal(
 			isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined),
 			true,
 		);
 	});
 	test("isModelInEnabledList_when_exact_or_provider_wildcard_matches_returns_true", () => {
 		assert.equal(
 			isModelInEnabledList("kimi-coding", "kimi-k2.6", [
 				"openai/gpt-5.2",
 				"kimi-coding/kimi-k2.6",
 			]),
 			true,
 		);
 		assert.equal(
 			isModelInEnabledList("kimi-coding", "kimi-k2.6", ["kimi-coding/*"]),
 			true,
 		);
 	});
 	test("isModelInEnabledList_when_no_pattern_matches_returns_false", () => {
 		assert.equal(
 			isModelInEnabledList("kimi-coding", "kimi-k2.6", ["openai/*"]),
 			false,
 		);
 	});
 });
--- a/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs
+++ b/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs
@ -581,6 +581,45 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar
 		expect(capturedOpts.noOutputTimeoutMs).toBe(45_000);
 	});
 	test("noOutputTimeoutMs aborts a silent turn even when runtime heartbeats continue", async () => {
 		vi.useFakeTimers();
 		const { runAgentTurn } = await import("../uok/agent-runner.js");
 		runAgentTurn.mockImplementationOnce(async (_agent, opts = {}) => {
 			setTimeout(() => {
 				opts.onEvent?.({ type: "runtime_heartbeat" });
 			}, 5);
 			setTimeout(() => {
 				opts.onEvent?.({ type: "runtime_heartbeat" });
 			}, 9);
 			return new Promise(() => {
 				// Simulate setup/model code that ignores AbortSignal. The outer watchdog
 				// must still resolve dispatchAndWait instead of waiting for this promise.
 			});
 		});
 		const root = makeProject();
 		const layer = new SwarmDispatchLayer(root);
 		const pending = layer.dispatchAndWait(
 			{
 				unitId: "task-heartbeat-only-timeout",
 				unitType: "execute-task",
 				workMode: "build",
 				payload: "edit files",
 				priority: 5,
 				scope: "scope-heartbeat-only-timeout",
 			},
 			{ noOutputTimeoutMs: 10, timeoutMs: 1_000 },
 		);
 		await vi.advanceTimersByTimeAsync(20);
 		const result = await pending;
 		vi.useRealTimers();
 		expect(result.reply).toBeNull();
 		expect(result.error).toContain("produced no output for 10ms");
 	});
 	test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => {
 		// Envelopes without the optional fields must not pass undefined opts to runAgentTurn.
 		const { runAgentTurn } = await import("../uok/agent-runner.js");
--- a/src/resources/extensions/sf/uok/swarm-dispatch.js
+++ b/src/resources/extensions/sf/uok/swarm-dispatch.js
@ -41,6 +41,102 @@ async function getRunAgentTurn() {
 	return _runAgentTurnFn;
 }
 function isMeaningfulAgentTurnEvent(event) {
 	if (String(event?.type) === "runtime_heartbeat") return false;
 	if (event?.type === "message_update") {
 		const streamEvent = event.assistantMessageEvent;
 		if (streamEvent?.type === "text_delta") {
 			return String(streamEvent.delta ?? "").length > 0;
 		}
 		return true;
 	}
 	return true;
 }
 async function runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, opts = {}) {
 	const { timeoutMs = 480_000, noOutputTimeoutMs, signal, onEvent } = opts;
 	const controller = new AbortController();
 	let hardTimer = null;
 	let noOutputTimer = null;
 	let settled = false;
 	let resolveWatchdog = null;
 	const watchdogResult = new Promise((resolve) => {
 		resolveWatchdog = resolve;
 	});
 	function abort(reason) {
 		if (settled) return;
 		try {
 			controller.abort(reason);
 		} catch {
 			controller.abort();
 		}
 		resolveWatchdog?.({
 			turnsProcessed: 0,
 			response: null,
 			error: reason,
 		});
 	}
 	function cleanup() {
 		settled = true;
 		if (hardTimer) clearTimeout(hardTimer);
 		if (noOutputTimer) clearTimeout(noOutputTimer);
 	}
 	function armNoOutputTimer() {
 		if (!noOutputTimeoutMs || noOutputTimeoutMs <= 0) return;
 		if (noOutputTimer) clearTimeout(noOutputTimer);
 		noOutputTimer = setTimeout(() => {
 			abort(`agent turn produced no output for ${noOutputTimeoutMs}ms`);
 		}, noOutputTimeoutMs);
 	}
 	if (signal) {
 		if (signal.aborted) abort("agent turn cancelled");
 		else
 			signal.addEventListener("abort", () => abort("agent turn cancelled"), {
 				once: true,
 			});
 	}
 	if (timeoutMs > 0) {
 		hardTimer = setTimeout(() => {
 			abort(`agent turn timed out after ${timeoutMs}ms`);
 		}, timeoutMs);
 	}
 	armNoOutputTimer();
 	const turnResult = runAgentTurn(agent, {
 		...opts,
 		signal: controller.signal,
 		onEvent: (event) => {
 			if (isMeaningfulAgentTurnEvent(event)) armNoOutputTimer();
 			onEvent?.(event);
 		},
 	}).catch((err) => {
 		if (controller.signal.aborted) {
 			return {
 				turnsProcessed: 0,
 				response: null,
 				error:
 					typeof controller.signal.reason === "string"
 						? controller.signal.reason
 						: "agent turn aborted",
 			};
 		}
 		throw err;
 	});
 	try {
 		const result = await Promise.race([turnResult, watchdogResult]);
 		cleanup();
 		return result;
 	} catch (err) {
 		cleanup();
 		throw err;
 	}
 }
 // Module-level cache keyed by `${basePath}:${swarmName}`
 const _cache = new Map();
@ -334,7 +430,7 @@ export class SwarmDispatchLayer {
 		const runAgentTurn = await getRunAgentTurn();
 		let turnResult;
 		try {
-			turnResult = await runAgentTurn(agent, {
+			turnResult = await runAgentTurnWithOuterWatchdogs(runAgentTurn, agent, {
 				timeoutMs,
 				...(noOutputTimeoutMs ? { noOutputTimeoutMs } : {}),
 				signal,