feat(journal): swarm-dispatch event per dispatch — cross-repo telemetry
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

The swarm dispatch path is default in headless (ea8a3d935) but the
journal didn't tag events with which dispatch path was used. Result:
grep "swarm" .sf/journal/*.jsonl returned zero hits across this repo,
~/code/dr-repo, ~/code/centralcloud/dr — even where swarm IS running.
Cross-repo telemetry was blind to swarm adoption.

Now both swarm dispatch sites emit a journal event per call:

runUnitViaSwarm (auto/run-unit.js):
- success: outcome from worker checkpoint or "continue", via "autonomous-unit"
- no-reply: outcome "no-reply" with error field
- throw:   outcome "error" with error field

runSingleAgentViaSwarm (subagent/index.js):
- success: outcome "agent-reply", via "subagent-extension", agentName
- no-reply / catch: same outcome scheme as run-unit

Event shape:
{
  ts, eventType: "swarm-dispatch",
  data: { unitType, unitId, targetAgent, workMode, toolCallCount,
          outcome, via, agentName?, error? }
}

All six emitJournalEvent calls wrapped in try/catch — journal write
failure must not break dispatch (mirrors crash-recovery.js pattern).

Tests: 68 new assertions across the two files (5 + 4 test groups
covering happy path, no-reply, throw). Full suite 1872 pass, no
regressions.

Once landed everywhere this enables:
- grep swarm-dispatch .sf/journal/*.jsonl shows adoption
- ~/.sf/agent/upstream-feedback.jsonl rolls up swarm vs legacy ratio
- "is this repo using swarms?" becomes a one-line query

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 13:22:28 +02:00
parent c42c13b882
commit bd3fbda9cb
4 changed files with 390 additions and 1 deletions

View file

@ -215,6 +215,9 @@ function buildSwarmWorkerSystemPrompt(unitType, unitId, basePath) {
/** /**
* Run a unit through the swarm dispatch layer instead of the parent session. * Run a unit through the swarm dispatch layer instead of the parent session.
* *
* Emits a `swarm-dispatch` journal event per call so cross-repo telemetry can
* distinguish swarm vs legacy paths.
*
* Purpose: alternate execution path activated by `SF_AUTONOMOUS_VIA_SWARM=1`. * Purpose: alternate execution path activated by `SF_AUTONOMOUS_VIA_SWARM=1`.
* Routes the unit prompt as a DispatchEnvelope through `swarmDispatchAndWait`, * Routes the unit prompt as a DispatchEnvelope through `swarmDispatchAndWait`,
* then maps the structured swarm result back into the same UnitResult shape that * then maps the structured swarm result back into the same UnitResult shape that
@ -438,6 +441,24 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
if (typeof ctx.ui?.notify === "function") { if (typeof ctx.ui?.notify === "function") {
ctx.ui.notify(`[${unitType}] ${unitId} → swarm error: ${msg}`, "error"); ctx.ui.notify(`[${unitType}] ${unitId} → swarm error: ${msg}`, "error");
} }
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType,
unitId,
targetAgent: null,
workMode: envelope.workMode,
toolCallCount: collectedToolCalls.length,
outcome: "error",
via: "autonomous-unit",
error: msg,
},
});
} catch {
/* journal write failure must not break dispatch */
}
return { return {
status: "cancelled", status: "cancelled",
requestDispatchedAt, requestDispatchedAt,
@ -492,6 +513,24 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
"error", "error",
); );
} }
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType,
unitId,
targetAgent: swarmResult.targetAgent ?? null,
workMode: envelope.workMode,
toolCallCount: collectedToolCalls.length,
outcome: "no-reply",
via: "autonomous-unit",
error: reason,
},
});
} catch {
/* journal write failure must not break dispatch */
}
return { return {
status: "cancelled", status: "cancelled",
requestDispatchedAt, requestDispatchedAt,
@ -518,6 +557,23 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
"info", "info",
); );
} }
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType,
unitId,
targetAgent: swarmResult.targetAgent,
workMode: envelope.workMode,
toolCallCount: collectedToolCalls.length,
outcome: workerSignaledOutcome ?? "continue",
via: "autonomous-unit",
},
});
} catch {
/* journal write failure must not break dispatch */
}
// ── Derive outcome and build content blocks ────────────────────────────── // ── Derive outcome and build content blocks ──────────────────────────────
// Use real events collected above. If the worker called the `checkpoint` tool // Use real events collected above. If the worker called the `checkpoint` tool

View file

@ -47,6 +47,7 @@ import {
mergeDeltaPatches, mergeDeltaPatches,
readIsolationMode, readIsolationMode,
} from "./isolation.js"; } from "./isolation.js";
import { emitJournalEvent } from "../journal.js";
import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js"; import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js";
import { composeAgentPrompt } from "./prompt-parts.js"; import { composeAgentPrompt } from "./prompt-parts.js";
import { registerWorker, updateWorker } from "./worker-registry.js"; import { registerWorker, updateWorker } from "./worker-registry.js";
@ -1196,6 +1197,9 @@ async function waitForFile(filePath, signal, timeoutMs = 30 * 60 * 1000) {
/** /**
* Run a single subagent through the swarm dispatch layer. * Run a single subagent through the swarm dispatch layer.
* *
* Emits a `swarm-dispatch` journal event per call so cross-repo telemetry can
* distinguish swarm vs legacy paths.
*
* Purpose: alternate execution path activated by `SF_SUBAGENT_VIA_SWARM=1`. * Purpose: alternate execution path activated by `SF_SUBAGENT_VIA_SWARM=1`.
* Routes the subagent task as a DispatchEnvelope through `swarmDispatchAndWait`, * Routes the subagent task as a DispatchEnvelope through `swarmDispatchAndWait`,
* then maps the structured swarm result back into the same currentResult shape * then maps the structured swarm result back into the same currentResult shape
@ -1298,6 +1302,25 @@ async function runSingleAgentViaSwarm(
currentResult.stderr = reason; currentResult.stderr = reason;
currentResult.errorMessage = reason; currentResult.errorMessage = reason;
emitUpdate(); emitUpdate();
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType: "delegate",
unitId: envelope.unitId,
targetAgent: swarmResult.targetAgent ?? null,
workMode: envelope.workMode,
toolCallCount: 0,
outcome: "no-reply",
via: "subagent-extension",
agentName: agent.name,
error: reason,
},
});
} catch {
/* journal write failure must not break dispatch */
}
return currentResult; return currentResult;
} }
@ -1314,6 +1337,24 @@ async function runSingleAgentViaSwarm(
} }
emitUpdate(); emitUpdate();
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType: "delegate",
unitId: envelope.unitId,
targetAgent: swarmResult.targetAgent,
workMode: envelope.workMode,
toolCallCount: 0,
outcome: "agent-reply",
via: "subagent-extension",
agentName: agent.name,
},
});
} catch {
/* journal write failure must not break dispatch */
}
return currentResult; return currentResult;
} catch (error) { } catch (error) {
const message = const message =
@ -1324,6 +1365,25 @@ async function runSingleAgentViaSwarm(
currentResult.stderr += currentResult.stderr ? `\n${message}` : message; currentResult.stderr += currentResult.stderr ? `\n${message}` : message;
currentResult.errorMessage = message; currentResult.errorMessage = message;
emitUpdate(); emitUpdate();
try {
emitJournalEvent(basePath, {
ts: new Date().toISOString(),
eventType: "swarm-dispatch",
data: {
unitType: "delegate",
unitId: envelope.unitId,
targetAgent: null,
workMode: envelope.workMode,
toolCallCount: 0,
outcome: "error",
via: "subagent-extension",
agentName: agent.name,
error: message,
},
});
} catch {
/* journal write failure must not break dispatch */
}
return currentResult; return currentResult;
} finally { } finally {
liveSubagentControllers.delete(controller); liveSubagentControllers.delete(controller);

View file

@ -54,6 +54,19 @@ vi.mock("../autonomous-solver.js", () => ({
MAX_CHECKPOINTS_PER_ITERATION: 5, MAX_CHECKPOINTS_PER_ITERATION: 5,
})); }));
// ─── Mock journal.js ──────────────────────────────────────────────────────────
// Stub emitJournalEvent so tests never touch the filesystem journal.
const { mockEmitJournalEvent } = vi.hoisted(() => {
const fn = vi.fn();
return { mockEmitJournalEvent: fn };
});
vi.mock("../journal.js", () => ({
emitJournalEvent: mockEmitJournalEvent,
queryJournal: vi.fn(async () => []),
}));
// ─── Mock everything runUnit imports that touches DB / session infrastructure ─ // ─── Mock everything runUnit imports that touches DB / session infrastructure ─
vi.mock("../debug-logger.js", () => ({ debugLog: vi.fn() })); vi.mock("../debug-logger.js", () => ({ debugLog: vi.fn() }));
@ -1086,3 +1099,128 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
expect(textBlocks[0].text).toBe(MOCK_REPLY); expect(textBlocks[0].text).toBe(MOCK_REPLY);
}); });
}); });
// ─── swarm-dispatch journal event ────────────────────────────────────────────
describe("runUnit — swarm-dispatch journal events", () => {
test("emits swarm-dispatch event on happy-path dispatch with correct shape", async () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
await runUnit(ctx, pi, s, "execute-task", "jrn-001", "implement it", {});
expect(mockEmitJournalEvent).toHaveBeenCalled();
const swarmEvent = mockEmitJournalEvent.mock.calls
.map((c) => c[1])
.find((e) => e.eventType === "swarm-dispatch");
expect(swarmEvent).toBeDefined();
expect(swarmEvent.ts).toMatch(/^\d{4}-\d{2}-\d{2}T/);
expect(swarmEvent.data.unitType).toBe("execute-task");
expect(swarmEvent.data.unitId).toBe("jrn-001");
expect(swarmEvent.data.targetAgent).toBe(MOCK_TARGET);
expect(swarmEvent.data.workMode).toBe("build");
expect(typeof swarmEvent.data.toolCallCount).toBe("number");
expect(swarmEvent.data.outcome).toBe("continue");
expect(swarmEvent.data.via).toBe("autonomous-unit");
});
test("emits swarm-dispatch event with outcome=complete when worker signals complete", async () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockWithToolCallEvents([
{
name: "checkpoint",
arguments: {
outcome: "complete",
summary: "All done.",
completedItems: ["task"],
remainingItems: [],
verificationEvidence: ["npm test: pass"],
},
},
]);
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
await runUnit(ctx, pi, s, "execute-task", "jrn-complete", "build it", {});
const swarmEvent = mockEmitJournalEvent.mock.calls
.map((c) => c[1])
.find((e) => e.eventType === "swarm-dispatch");
expect(swarmEvent).toBeDefined();
expect(swarmEvent.data.outcome).toBe("complete");
});
test("emits swarm-dispatch event with outcome=error when swarmDispatchAndWait throws", async () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockSwarmDispatchAndWait.mockRejectedValueOnce(new Error("bus failure"));
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
await runUnit(ctx, pi, s, "execute-task", "jrn-err", "prompt", {});
const swarmEvent = mockEmitJournalEvent.mock.calls
.map((c) => c[1])
.find((e) => e.eventType === "swarm-dispatch");
expect(swarmEvent).toBeDefined();
expect(swarmEvent.data.outcome).toBe("error");
expect(swarmEvent.data.via).toBe("autonomous-unit");
expect(swarmEvent.data.error).toContain("bus failure");
});
test("emits swarm-dispatch event with outcome=no-reply when swarm returns error field", async () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockSwarmDispatchAndWait.mockResolvedValueOnce({
messageId: "m-noreply-jrn",
targetAgent: "worker-1",
swarmName: "default",
reply: null,
replyMessageId: null,
error: "runAgentTurn failed",
});
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
await runUnit(ctx, pi, s, "execute-task", "jrn-noreply", "prompt", {});
const swarmEvent = mockEmitJournalEvent.mock.calls
.map((c) => c[1])
.find((e) => e.eventType === "swarm-dispatch");
expect(swarmEvent).toBeDefined();
expect(swarmEvent.data.outcome).toBe("no-reply");
expect(swarmEvent.data.error).toContain("runAgentTurn failed");
});
test("journal write failure does not break dispatch (fail-open)", async () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockEmitJournalEvent.mockImplementationOnce(() => {
throw new Error("disk full");
});
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
const result = await runUnit(
ctx,
pi,
s,
"execute-task",
"jrn-failopen",
"build it",
{},
);
// Must still complete even though journal threw
expect(result.status).toBe("completed");
});
});

View file

@ -22,8 +22,15 @@ vi.mock("../uok/swarm-dispatch.js", () => {
}; };
}); });
// Import the mock handle and the module under test AFTER vi.mock is declared. // ─── Mock journal.js ──────────────────────────────────────────────────────────
vi.mock("../journal.js", () => ({
emitJournalEvent: vi.fn(),
queryJournal: vi.fn(async () => []),
}));
// Import the mock handles and the module under test AFTER vi.mock is declared.
const { swarmDispatchAndWait } = await import("../uok/swarm-dispatch.js"); const { swarmDispatchAndWait } = await import("../uok/swarm-dispatch.js");
const { emitJournalEvent } = await import("../journal.js");
// runSingleAgent and runSingleAgentViaSwarm are exported for testing only. // runSingleAgent and runSingleAgentViaSwarm are exported for testing only.
const { runSingleAgent, runSingleAgentViaSwarm } = await import( const { runSingleAgent, runSingleAgentViaSwarm } = await import(
@ -409,3 +416,131 @@ test("AbortSignal is passed through to swarmDispatchAndWait options", async () =
"signal must be an AbortSignal", "signal must be an AbortSignal",
); );
}); });
// ─── swarm-dispatch journal events ────────────────────────────────────────────
test("swarm success → emits swarm-dispatch journal event with outcome=agent-reply", async () => {
process.env.SF_SUBAGENT_VIA_SWARM = "1";
swarmDispatchAndWait.mockResolvedValueOnce(makeDeterministicSwarmResult());
const agents = makeAgents();
await runSingleAgent(
DEFAULT_CWD,
agents,
"worker",
"implement the feature",
undefined,
1,
undefined,
undefined,
NOOP_MAKE_DETAILS,
undefined,
undefined,
);
const calls = emitJournalEvent.mock.calls;
assert.ok(calls.length > 0, "emitJournalEvent should have been called");
const swarmCall = calls.find((c) => c[1]?.eventType === "swarm-dispatch");
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted");
const [calledBasePath, entry] = swarmCall;
assert.equal(calledBasePath, DEFAULT_CWD);
assert.match(entry.ts, /^\d{4}-\d{2}-\d{2}T/);
assert.equal(entry.data.unitType, "delegate");
assert.ok(
entry.data.unitId.startsWith("worker-"),
`unitId should start with "worker-", got: ${entry.data.unitId}`,
);
assert.equal(entry.data.targetAgent, "worker-1");
assert.equal(entry.data.workMode, "build");
assert.equal(typeof entry.data.toolCallCount, "number");
assert.equal(entry.data.outcome, "agent-reply");
assert.equal(entry.data.via, "subagent-extension");
assert.equal(entry.data.agentName, "worker");
});
test("swarm returns error → emits swarm-dispatch event with outcome=no-reply", async () => {
process.env.SF_SUBAGENT_VIA_SWARM = "1";
swarmDispatchAndWait.mockResolvedValueOnce({
...makeDeterministicSwarmResult(),
reply: null,
error: "agent runner failed",
});
const agents = makeAgents();
await runSingleAgent(
DEFAULT_CWD,
agents,
"worker",
"task",
undefined,
1,
undefined,
undefined,
NOOP_MAKE_DETAILS,
undefined,
undefined,
);
const swarmCall = emitJournalEvent.mock.calls.find(
(c) => c[1]?.eventType === "swarm-dispatch",
);
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted");
const [, entry] = swarmCall;
assert.equal(entry.data.outcome, "no-reply");
assert.ok(entry.data.error?.includes("agent runner failed"));
assert.equal(entry.data.via, "subagent-extension");
});
test("swarmDispatchAndWait throws → emits swarm-dispatch event with outcome=error", async () => {
process.env.SF_SUBAGENT_VIA_SWARM = "1";
swarmDispatchAndWait.mockRejectedValueOnce(new Error("network timeout"));
const agents = makeAgents();
await runSingleAgent(
DEFAULT_CWD,
agents,
"worker",
"task",
undefined,
1,
undefined,
undefined,
NOOP_MAKE_DETAILS,
undefined,
undefined,
);
const swarmCall = emitJournalEvent.mock.calls.find(
(c) => c[1]?.eventType === "swarm-dispatch",
);
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted on throw");
const [, entry] = swarmCall;
assert.equal(entry.data.outcome, "error");
assert.ok(entry.data.error?.includes("network timeout"));
});
test("journal write failure does not break subagent dispatch (fail-open)", async () => {
process.env.SF_SUBAGENT_VIA_SWARM = "1";
swarmDispatchAndWait.mockResolvedValueOnce(makeDeterministicSwarmResult());
emitJournalEvent.mockImplementationOnce(() => {
throw new Error("disk full");
});
const agents = makeAgents();
const result = await runSingleAgent(
DEFAULT_CWD,
agents,
"worker",
"task",
undefined,
1,
undefined,
undefined,
NOOP_MAKE_DETAILS,
undefined,
undefined,
);
// Must still succeed even though journal threw
assert.equal(result.exitCode, 0);
});