From 85f66508522e37b90732af2fac96fb97c79886cd Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 13:35:20 +0200 Subject: [PATCH] fix(auto): keep solver checkpoint pass out of swarm --- .../extensions/sf/auto/phases-unit.js | 2 +- src/resources/extensions/sf/auto/run-unit.js | 42 +++++++++++++++---- .../tests/auto-halt-watchdog-notify.test.mjs | 17 +++++--- .../sf/tests/run-unit-via-swarm.test.mjs | 33 +++++++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) diff --git a/src/resources/extensions/sf/auto/phases-unit.js b/src/resources/extensions/sf/auto/phases-unit.js index 32c06c54a..b30615ac1 100644 --- a/src/resources/extensions/sf/auto/phases-unit.js +++ b/src/resources/extensions/sf/auto/phases-unit.js @@ -1116,7 +1116,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { solverAssessment.repairAttempt, solverAssessment.maxRepairAttempts, ), - { keepSession: true }, + { keepSession: true, activeToolsAllowlist: ["checkpoint"] }, ); s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null; if (currentUnitResult.status === "cancelled") { diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index fcba4e957..b7de09273 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -16,6 +16,7 @@ import { import { scopeActiveToolsForUnitType } from "../constants.js"; import { debugLog } from "../debug-logger.js"; import { getErrorMessage } from "../error-utils.js"; +import { emitJournalEvent } from "../journal.js"; import { resolveAutoSupervisorConfig, resolvePersistModelChanges, @@ -26,7 +27,6 @@ import { countChangedFiles, resetRunawayGuardState, } from "../uok/auto-runaway-guard.js"; -import { emitJournalEvent } from "../journal.js"; import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js"; import { logWarning } from "../workflow-logger.js"; import { @@ -162,6 +162,37 @@ function legacyPermissionLevelForProfile(profile) { } } +/** + * Return true when a runUnit call is the protocol-only solver checkpoint pass. + * + * Purpose: keep autonomous executor dispatch and solver checkpoint classification + * on separate execution paths. Headless mode defaults executor units through the + * swarm layer, but the checkpoint-only solver pass must stay in the parent session + * where `activeToolsAllowlist: ["checkpoint"]` is enforced directly; routing it + * back through swarm turns the protocol repair into another executor job and can + * re-enter the missing-checkpoint loop. + * + * Consumer: runUnit's swarm routing gate. + */ +function isCheckpointOnlyProtocolPass(options) { + const tools = options?.activeToolsAllowlist; + return ( + Array.isArray(tools) && tools.length === 1 && tools[0] === "checkpoint" + ); +} + +function shouldRouteRunUnitViaSwarm(options) { + if (isCheckpointOnlyProtocolPass(options)) return false; + const swarmFlag = process.env.SF_AUTONOMOUS_VIA_SWARM; + return ( + swarmFlag === "1" || + swarmFlag === "true" || + (swarmFlag !== "0" && + swarmFlag !== "false" && + process.env.SF_HEADLESS === "1") + ); +} + /** * Build the system prompt for a swarm worker executing an autonomous unit. * @@ -773,14 +804,7 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { */ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) { // Feature-flagged swarm path — default on in headless mode, opt-in elsewhere. - const swarmFlag = process.env.SF_AUTONOMOUS_VIA_SWARM; - const useSwarm = - swarmFlag === "1" || - swarmFlag === "true" || - (swarmFlag !== "0" && - swarmFlag !== "false" && - process.env.SF_HEADLESS === "1"); - if (useSwarm) { + if (shouldRouteRunUnitViaSwarm(options)) { return runUnitViaSwarm(ctx, pi, s, unitType, unitId, prompt, options); } diff --git a/src/resources/extensions/sf/tests/auto-halt-watchdog-notify.test.mjs b/src/resources/extensions/sf/tests/auto-halt-watchdog-notify.test.mjs index d2c80adb9..51744035f 100644 --- a/src/resources/extensions/sf/tests/auto-halt-watchdog-notify.test.mjs +++ b/src/resources/extensions/sf/tests/auto-halt-watchdog-notify.test.mjs @@ -15,6 +15,7 @@ import { _resetNotificationStore, appendNotification, getAppendFailureCount, + getLastAppendFailure, initNotificationStore, NOTICE_KIND, } from "../notification-store.js"; @@ -46,7 +47,7 @@ function readNotifications() { return content.split("\n").map((line) => JSON.parse(line)); } -test("HaltWatchdog.check_when_idle_exceeds_threshold_emits_blocking_notice", () => { +test("HaltWatchdog.check()_when_idle_exceeds_threshold_emits_BLOCKING_NOTICE_to_jsonl", () => { vi.useFakeTimers(); const thresholdMs = 10; const watchdog = new HaltWatchdog(testDir, thresholdMs); @@ -67,7 +68,7 @@ test("HaltWatchdog.check_when_idle_exceeds_threshold_emits_blocking_notice", () assert.match(entries[0].message, /idle for/); }); -test("HaltWatchdog.check_when_within_threshold_emits_no_notification", () => { +test("HaltWatchdog.check()_when_within_threshold_emits_no_notification", () => { vi.useFakeTimers(); const thresholdMs = 10_000; const watchdog = new HaltWatchdog(testDir, thresholdMs); @@ -84,7 +85,7 @@ test("HaltWatchdog.check_when_within_threshold_emits_no_notification", () => { assert.deepEqual(readNotifications(), []); }); -test("HaltWatchdog.check_reports_only_once_per_idle_period", () => { +test("HaltWatchdog.check()_reports_only_once_per_idle_period", () => { vi.useFakeTimers(); const thresholdMs = 10; const watchdog = new HaltWatchdog(testDir, thresholdMs); @@ -100,10 +101,16 @@ test("HaltWatchdog.check_reports_only_once_per_idle_period", () => { assert.equal(readNotifications().length, 1); }); -test("appendNotification_when_store_not_initialized_fails_open", () => { +test("appendNotification_fails_open_when_basePath_is_null", () => { _resetNotificationStore(); + // basePath is null because initNotificationStore was never called after reset - appendNotification("test", "info", "test"); + assert.doesNotThrow(() => { + appendNotification("test", "info", "test"); + }); + // When basePath is null, appendNotification returns early without error. + // The fail-open contract is: no crash, no unhandled exception. assert.equal(getAppendFailureCount(), 0); + assert.equal(getLastAppendFailure(), null); }); diff --git a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs index c191afb00..46a005d0a 100644 --- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs +++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs @@ -133,13 +133,16 @@ function makeS(basePath = "/tmp/test-project") { let origEnv; let origNoOutputEnv; +let origHeadlessEnv; beforeEach(() => { vi.clearAllMocks(); origEnv = process.env.SF_AUTONOMOUS_VIA_SWARM; origNoOutputEnv = process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS; + origHeadlessEnv = process.env.SF_HEADLESS; delete process.env.SF_AUTONOMOUS_VIA_SWARM; delete process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS; + delete process.env.SF_HEADLESS; // Default implementation for the happy-path tests: return a deterministic reply. mockSwarmDispatchAndWait.mockImplementation( @@ -165,6 +168,11 @@ afterEach(() => { } else { process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS = origNoOutputEnv; } + if (origHeadlessEnv === undefined) { + delete process.env.SF_HEADLESS; + } else { + process.env.SF_HEADLESS = origHeadlessEnv; + } }); // ─── Flag ON — happy path ───────────────────────────────────────────────────── @@ -694,6 +702,31 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM unset — default path", () => { expect(mockSwarmDispatchAndWait).not.toHaveBeenCalled(); expect(result.status).toBe("cancelled"); }); + + test("does NOT route checkpoint-only solver pass through swarm in headless mode", async () => { + process.env.SF_HEADLESS = "1"; + process.env.SF_AUTONOMOUS_VIA_SWARM = "1"; + + const ctx = makeCtx("/tmp"); + const pi = makePi(); + const s = makeS("/tmp"); + s.cmdCtx.newSession = vi.fn(async () => { + throw new Error("session-failed"); + }); + + const result = await runUnit( + ctx, + pi, + s, + "execute-task", + "u-solver-pass", + "emit checkpoint only", + { activeToolsAllowlist: ["checkpoint"] }, + ); + + expect(mockSwarmDispatchAndWait).not.toHaveBeenCalled(); + expect(result.status).toBe("cancelled"); + }); }); // ─── workMode derivation unit tests ──────────────────────────────────────────