fix(auto): keep solver checkpoint pass out of swarm

This commit is contained in:
Mikael Hugo 2026-05-15 13:35:20 +02:00
parent bd3fbda9cb
commit 85f6650852
4 changed files with 79 additions and 15 deletions

View file

@ -1116,7 +1116,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
solverAssessment.repairAttempt,
solverAssessment.maxRepairAttempts,
),
{ keepSession: true },
{ keepSession: true, activeToolsAllowlist: ["checkpoint"] },
);
s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null;
if (currentUnitResult.status === "cancelled") {

View file

@ -16,6 +16,7 @@ import {
import { scopeActiveToolsForUnitType } from "../constants.js";
import { debugLog } from "../debug-logger.js";
import { getErrorMessage } from "../error-utils.js";
import { emitJournalEvent } from "../journal.js";
import {
resolveAutoSupervisorConfig,
resolvePersistModelChanges,
@ -26,7 +27,6 @@ import {
countChangedFiles,
resetRunawayGuardState,
} from "../uok/auto-runaway-guard.js";
import { emitJournalEvent } from "../journal.js";
import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js";
import { logWarning } from "../workflow-logger.js";
import {
@ -162,6 +162,37 @@ function legacyPermissionLevelForProfile(profile) {
}
}
/**
* Return true when a runUnit call is the protocol-only solver checkpoint pass.
*
* Purpose: keep autonomous executor dispatch and solver checkpoint classification
* on separate execution paths. Headless mode defaults executor units through the
* swarm layer, but the checkpoint-only solver pass must stay in the parent session
* where `activeToolsAllowlist: ["checkpoint"]` is enforced directly; routing it
* back through swarm turns the protocol repair into another executor job and can
* re-enter the missing-checkpoint loop.
*
* Consumer: runUnit's swarm routing gate.
*/
function isCheckpointOnlyProtocolPass(options) {
const tools = options?.activeToolsAllowlist;
return (
Array.isArray(tools) && tools.length === 1 && tools[0] === "checkpoint"
);
}
function shouldRouteRunUnitViaSwarm(options) {
if (isCheckpointOnlyProtocolPass(options)) return false;
const swarmFlag = process.env.SF_AUTONOMOUS_VIA_SWARM;
return (
swarmFlag === "1" ||
swarmFlag === "true" ||
(swarmFlag !== "0" &&
swarmFlag !== "false" &&
process.env.SF_HEADLESS === "1")
);
}
/**
* Build the system prompt for a swarm worker executing an autonomous unit.
*
@ -773,14 +804,7 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
*/
export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
// Feature-flagged swarm path — default on in headless mode, opt-in elsewhere.
const swarmFlag = process.env.SF_AUTONOMOUS_VIA_SWARM;
const useSwarm =
swarmFlag === "1" ||
swarmFlag === "true" ||
(swarmFlag !== "0" &&
swarmFlag !== "false" &&
process.env.SF_HEADLESS === "1");
if (useSwarm) {
if (shouldRouteRunUnitViaSwarm(options)) {
return runUnitViaSwarm(ctx, pi, s, unitType, unitId, prompt, options);
}

View file

@ -15,6 +15,7 @@ import {
_resetNotificationStore,
appendNotification,
getAppendFailureCount,
getLastAppendFailure,
initNotificationStore,
NOTICE_KIND,
} from "../notification-store.js";
@ -46,7 +47,7 @@ function readNotifications() {
return content.split("\n").map((line) => JSON.parse(line));
}
test("HaltWatchdog.check_when_idle_exceeds_threshold_emits_blocking_notice", () => {
test("HaltWatchdog.check()_when_idle_exceeds_threshold_emits_BLOCKING_NOTICE_to_jsonl", () => {
vi.useFakeTimers();
const thresholdMs = 10;
const watchdog = new HaltWatchdog(testDir, thresholdMs);
@ -67,7 +68,7 @@ test("HaltWatchdog.check_when_idle_exceeds_threshold_emits_blocking_notice", ()
assert.match(entries[0].message, /idle for/);
});
test("HaltWatchdog.check_when_within_threshold_emits_no_notification", () => {
test("HaltWatchdog.check()_when_within_threshold_emits_no_notification", () => {
vi.useFakeTimers();
const thresholdMs = 10_000;
const watchdog = new HaltWatchdog(testDir, thresholdMs);
@ -84,7 +85,7 @@ test("HaltWatchdog.check_when_within_threshold_emits_no_notification", () => {
assert.deepEqual(readNotifications(), []);
});
test("HaltWatchdog.check_reports_only_once_per_idle_period", () => {
test("HaltWatchdog.check()_reports_only_once_per_idle_period", () => {
vi.useFakeTimers();
const thresholdMs = 10;
const watchdog = new HaltWatchdog(testDir, thresholdMs);
@ -100,10 +101,16 @@ test("HaltWatchdog.check_reports_only_once_per_idle_period", () => {
assert.equal(readNotifications().length, 1);
});
test("appendNotification_when_store_not_initialized_fails_open", () => {
test("appendNotification_fails_open_when_basePath_is_null", () => {
_resetNotificationStore();
// basePath is null because initNotificationStore was never called after reset
assert.doesNotThrow(() => {
appendNotification("test", "info", "test");
});
// When basePath is null, appendNotification returns early without error.
// The fail-open contract is: no crash, no unhandled exception.
assert.equal(getAppendFailureCount(), 0);
assert.equal(getLastAppendFailure(), null);
});

View file

@ -133,13 +133,16 @@ function makeS(basePath = "/tmp/test-project") {
let origEnv;
let origNoOutputEnv;
let origHeadlessEnv;
beforeEach(() => {
vi.clearAllMocks();
origEnv = process.env.SF_AUTONOMOUS_VIA_SWARM;
origNoOutputEnv = process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS;
origHeadlessEnv = process.env.SF_HEADLESS;
delete process.env.SF_AUTONOMOUS_VIA_SWARM;
delete process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS;
delete process.env.SF_HEADLESS;
// Default implementation for the happy-path tests: return a deterministic reply.
mockSwarmDispatchAndWait.mockImplementation(
@ -165,6 +168,11 @@ afterEach(() => {
} else {
process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS = origNoOutputEnv;
}
if (origHeadlessEnv === undefined) {
delete process.env.SF_HEADLESS;
} else {
process.env.SF_HEADLESS = origHeadlessEnv;
}
});
// ─── Flag ON — happy path ─────────────────────────────────────────────────────
@ -694,6 +702,31 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM unset — default path", () => {
expect(mockSwarmDispatchAndWait).not.toHaveBeenCalled();
expect(result.status).toBe("cancelled");
});
test("does NOT route checkpoint-only solver pass through swarm in headless mode", async () => {
process.env.SF_HEADLESS = "1";
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
const ctx = makeCtx("/tmp");
const pi = makePi();
const s = makeS("/tmp");
s.cmdCtx.newSession = vi.fn(async () => {
throw new Error("session-failed");
});
const result = await runUnit(
ctx,
pi,
s,
"execute-task",
"u-solver-pass",
"emit checkpoint only",
{ activeToolsAllowlist: ["checkpoint"] },
);
expect(mockSwarmDispatchAndWait).not.toHaveBeenCalled();
expect(result.status).toBe("cancelled");
});
});
// ─── workMode derivation unit tests ──────────────────────────────────────────