feat(journal): swarm-dispatch event per dispatch — cross-repo telemetry
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
The swarm dispatch path is default in headless (ea8a3d935) but the
journal didn't tag events with which dispatch path was used. Result:
grep "swarm" .sf/journal/*.jsonl returned zero hits across this repo,
~/code/dr-repo, ~/code/centralcloud/dr — even where swarm IS running.
Cross-repo telemetry was blind to swarm adoption.
Now both swarm dispatch sites emit a journal event per call:
runUnitViaSwarm (auto/run-unit.js):
- success: outcome from worker checkpoint or "continue", via "autonomous-unit"
- no-reply: outcome "no-reply" with error field
- throw: outcome "error" with error field
runSingleAgentViaSwarm (subagent/index.js):
- success: outcome "agent-reply", via "subagent-extension", agentName
- no-reply / catch: same outcome scheme as run-unit
Event shape:
{
ts, eventType: "swarm-dispatch",
data: { unitType, unitId, targetAgent, workMode, toolCallCount,
outcome, via, agentName?, error? }
}
All six emitJournalEvent calls wrapped in try/catch — journal write
failure must not break dispatch (mirrors crash-recovery.js pattern).
Tests: 68 new assertions across the two files (5 + 4 test groups
covering happy path, no-reply, throw). Full suite 1872 pass, no
regressions.
Once landed everywhere this enables:
- grep swarm-dispatch .sf/journal/*.jsonl shows adoption
- ~/.sf/agent/upstream-feedback.jsonl rolls up swarm vs legacy ratio
- "is this repo using swarms?" becomes a one-line query
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c42c13b882
commit
bd3fbda9cb
4 changed files with 390 additions and 1 deletions
|
|
@ -215,6 +215,9 @@ function buildSwarmWorkerSystemPrompt(unitType, unitId, basePath) {
|
|||
/**
|
||||
* Run a unit through the swarm dispatch layer instead of the parent session.
|
||||
*
|
||||
* Emits a `swarm-dispatch` journal event per call so cross-repo telemetry can
|
||||
* distinguish swarm vs legacy paths.
|
||||
*
|
||||
* Purpose: alternate execution path activated by `SF_AUTONOMOUS_VIA_SWARM=1`.
|
||||
* Routes the unit prompt as a DispatchEnvelope through `swarmDispatchAndWait`,
|
||||
* then maps the structured swarm result back into the same UnitResult shape that
|
||||
|
|
@ -438,6 +441,24 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
|
|||
if (typeof ctx.ui?.notify === "function") {
|
||||
ctx.ui.notify(`[${unitType}] ${unitId} → swarm error: ${msg}`, "error");
|
||||
}
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
targetAgent: null,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: collectedToolCalls.length,
|
||||
outcome: "error",
|
||||
via: "autonomous-unit",
|
||||
error: msg,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
return {
|
||||
status: "cancelled",
|
||||
requestDispatchedAt,
|
||||
|
|
@ -492,6 +513,24 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
|
|||
"error",
|
||||
);
|
||||
}
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
targetAgent: swarmResult.targetAgent ?? null,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: collectedToolCalls.length,
|
||||
outcome: "no-reply",
|
||||
via: "autonomous-unit",
|
||||
error: reason,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
return {
|
||||
status: "cancelled",
|
||||
requestDispatchedAt,
|
||||
|
|
@ -518,6 +557,23 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
|
|||
"info",
|
||||
);
|
||||
}
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
targetAgent: swarmResult.targetAgent,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: collectedToolCalls.length,
|
||||
outcome: workerSignaledOutcome ?? "continue",
|
||||
via: "autonomous-unit",
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
|
||||
// ── Derive outcome and build content blocks ──────────────────────────────
|
||||
// Use real events collected above. If the worker called the `checkpoint` tool
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ import {
|
|||
mergeDeltaPatches,
|
||||
readIsolationMode,
|
||||
} from "./isolation.js";
|
||||
import { emitJournalEvent } from "../journal.js";
|
||||
import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js";
|
||||
import { composeAgentPrompt } from "./prompt-parts.js";
|
||||
import { registerWorker, updateWorker } from "./worker-registry.js";
|
||||
|
|
@ -1196,6 +1197,9 @@ async function waitForFile(filePath, signal, timeoutMs = 30 * 60 * 1000) {
|
|||
/**
|
||||
* Run a single subagent through the swarm dispatch layer.
|
||||
*
|
||||
* Emits a `swarm-dispatch` journal event per call so cross-repo telemetry can
|
||||
* distinguish swarm vs legacy paths.
|
||||
*
|
||||
* Purpose: alternate execution path activated by `SF_SUBAGENT_VIA_SWARM=1`.
|
||||
* Routes the subagent task as a DispatchEnvelope through `swarmDispatchAndWait`,
|
||||
* then maps the structured swarm result back into the same currentResult shape
|
||||
|
|
@ -1298,6 +1302,25 @@ async function runSingleAgentViaSwarm(
|
|||
currentResult.stderr = reason;
|
||||
currentResult.errorMessage = reason;
|
||||
emitUpdate();
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType: "delegate",
|
||||
unitId: envelope.unitId,
|
||||
targetAgent: swarmResult.targetAgent ?? null,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: 0,
|
||||
outcome: "no-reply",
|
||||
via: "subagent-extension",
|
||||
agentName: agent.name,
|
||||
error: reason,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
return currentResult;
|
||||
}
|
||||
|
||||
|
|
@ -1314,6 +1337,24 @@ async function runSingleAgentViaSwarm(
|
|||
}
|
||||
|
||||
emitUpdate();
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType: "delegate",
|
||||
unitId: envelope.unitId,
|
||||
targetAgent: swarmResult.targetAgent,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: 0,
|
||||
outcome: "agent-reply",
|
||||
via: "subagent-extension",
|
||||
agentName: agent.name,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
return currentResult;
|
||||
} catch (error) {
|
||||
const message =
|
||||
|
|
@ -1324,6 +1365,25 @@ async function runSingleAgentViaSwarm(
|
|||
currentResult.stderr += currentResult.stderr ? `\n${message}` : message;
|
||||
currentResult.errorMessage = message;
|
||||
emitUpdate();
|
||||
try {
|
||||
emitJournalEvent(basePath, {
|
||||
ts: new Date().toISOString(),
|
||||
eventType: "swarm-dispatch",
|
||||
data: {
|
||||
unitType: "delegate",
|
||||
unitId: envelope.unitId,
|
||||
targetAgent: null,
|
||||
workMode: envelope.workMode,
|
||||
toolCallCount: 0,
|
||||
outcome: "error",
|
||||
via: "subagent-extension",
|
||||
agentName: agent.name,
|
||||
error: message,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
/* journal write failure must not break dispatch */
|
||||
}
|
||||
return currentResult;
|
||||
} finally {
|
||||
liveSubagentControllers.delete(controller);
|
||||
|
|
|
|||
|
|
@ -54,6 +54,19 @@ vi.mock("../autonomous-solver.js", () => ({
|
|||
MAX_CHECKPOINTS_PER_ITERATION: 5,
|
||||
}));
|
||||
|
||||
// ─── Mock journal.js ──────────────────────────────────────────────────────────
|
||||
// Stub emitJournalEvent so tests never touch the filesystem journal.
|
||||
|
||||
const { mockEmitJournalEvent } = vi.hoisted(() => {
|
||||
const fn = vi.fn();
|
||||
return { mockEmitJournalEvent: fn };
|
||||
});
|
||||
|
||||
vi.mock("../journal.js", () => ({
|
||||
emitJournalEvent: mockEmitJournalEvent,
|
||||
queryJournal: vi.fn(async () => []),
|
||||
}));
|
||||
|
||||
// ─── Mock everything runUnit imports that touches DB / session infrastructure ─
|
||||
|
||||
vi.mock("../debug-logger.js", () => ({ debugLog: vi.fn() }));
|
||||
|
|
@ -1086,3 +1099,128 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
|
|||
expect(textBlocks[0].text).toBe(MOCK_REPLY);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── swarm-dispatch journal event ────────────────────────────────────────────
|
||||
|
||||
describe("runUnit — swarm-dispatch journal events", () => {
|
||||
test("emits swarm-dispatch event on happy-path dispatch with correct shape", async () => {
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
|
||||
const ctx = makeCtx("/proj");
|
||||
const pi = makePi();
|
||||
const s = makeS("/proj");
|
||||
|
||||
await runUnit(ctx, pi, s, "execute-task", "jrn-001", "implement it", {});
|
||||
|
||||
expect(mockEmitJournalEvent).toHaveBeenCalled();
|
||||
const swarmEvent = mockEmitJournalEvent.mock.calls
|
||||
.map((c) => c[1])
|
||||
.find((e) => e.eventType === "swarm-dispatch");
|
||||
expect(swarmEvent).toBeDefined();
|
||||
expect(swarmEvent.ts).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||
expect(swarmEvent.data.unitType).toBe("execute-task");
|
||||
expect(swarmEvent.data.unitId).toBe("jrn-001");
|
||||
expect(swarmEvent.data.targetAgent).toBe(MOCK_TARGET);
|
||||
expect(swarmEvent.data.workMode).toBe("build");
|
||||
expect(typeof swarmEvent.data.toolCallCount).toBe("number");
|
||||
expect(swarmEvent.data.outcome).toBe("continue");
|
||||
expect(swarmEvent.data.via).toBe("autonomous-unit");
|
||||
});
|
||||
|
||||
test("emits swarm-dispatch event with outcome=complete when worker signals complete", async () => {
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
|
||||
mockWithToolCallEvents([
|
||||
{
|
||||
name: "checkpoint",
|
||||
arguments: {
|
||||
outcome: "complete",
|
||||
summary: "All done.",
|
||||
completedItems: ["task"],
|
||||
remainingItems: [],
|
||||
verificationEvidence: ["npm test: pass"],
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
const ctx = makeCtx("/proj");
|
||||
const pi = makePi();
|
||||
const s = makeS("/proj");
|
||||
|
||||
await runUnit(ctx, pi, s, "execute-task", "jrn-complete", "build it", {});
|
||||
|
||||
const swarmEvent = mockEmitJournalEvent.mock.calls
|
||||
.map((c) => c[1])
|
||||
.find((e) => e.eventType === "swarm-dispatch");
|
||||
expect(swarmEvent).toBeDefined();
|
||||
expect(swarmEvent.data.outcome).toBe("complete");
|
||||
});
|
||||
|
||||
test("emits swarm-dispatch event with outcome=error when swarmDispatchAndWait throws", async () => {
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
mockSwarmDispatchAndWait.mockRejectedValueOnce(new Error("bus failure"));
|
||||
|
||||
const ctx = makeCtx("/proj");
|
||||
const pi = makePi();
|
||||
const s = makeS("/proj");
|
||||
|
||||
await runUnit(ctx, pi, s, "execute-task", "jrn-err", "prompt", {});
|
||||
|
||||
const swarmEvent = mockEmitJournalEvent.mock.calls
|
||||
.map((c) => c[1])
|
||||
.find((e) => e.eventType === "swarm-dispatch");
|
||||
expect(swarmEvent).toBeDefined();
|
||||
expect(swarmEvent.data.outcome).toBe("error");
|
||||
expect(swarmEvent.data.via).toBe("autonomous-unit");
|
||||
expect(swarmEvent.data.error).toContain("bus failure");
|
||||
});
|
||||
|
||||
test("emits swarm-dispatch event with outcome=no-reply when swarm returns error field", async () => {
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
mockSwarmDispatchAndWait.mockResolvedValueOnce({
|
||||
messageId: "m-noreply-jrn",
|
||||
targetAgent: "worker-1",
|
||||
swarmName: "default",
|
||||
reply: null,
|
||||
replyMessageId: null,
|
||||
error: "runAgentTurn failed",
|
||||
});
|
||||
|
||||
const ctx = makeCtx("/proj");
|
||||
const pi = makePi();
|
||||
const s = makeS("/proj");
|
||||
|
||||
await runUnit(ctx, pi, s, "execute-task", "jrn-noreply", "prompt", {});
|
||||
|
||||
const swarmEvent = mockEmitJournalEvent.mock.calls
|
||||
.map((c) => c[1])
|
||||
.find((e) => e.eventType === "swarm-dispatch");
|
||||
expect(swarmEvent).toBeDefined();
|
||||
expect(swarmEvent.data.outcome).toBe("no-reply");
|
||||
expect(swarmEvent.data.error).toContain("runAgentTurn failed");
|
||||
});
|
||||
|
||||
test("journal write failure does not break dispatch (fail-open)", async () => {
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
mockEmitJournalEvent.mockImplementationOnce(() => {
|
||||
throw new Error("disk full");
|
||||
});
|
||||
|
||||
const ctx = makeCtx("/proj");
|
||||
const pi = makePi();
|
||||
const s = makeS("/proj");
|
||||
|
||||
const result = await runUnit(
|
||||
ctx,
|
||||
pi,
|
||||
s,
|
||||
"execute-task",
|
||||
"jrn-failopen",
|
||||
"build it",
|
||||
{},
|
||||
);
|
||||
|
||||
// Must still complete even though journal threw
|
||||
expect(result.status).toBe("completed");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -22,8 +22,15 @@ vi.mock("../uok/swarm-dispatch.js", () => {
|
|||
};
|
||||
});
|
||||
|
||||
// Import the mock handle and the module under test AFTER vi.mock is declared.
|
||||
// ─── Mock journal.js ──────────────────────────────────────────────────────────
|
||||
vi.mock("../journal.js", () => ({
|
||||
emitJournalEvent: vi.fn(),
|
||||
queryJournal: vi.fn(async () => []),
|
||||
}));
|
||||
|
||||
// Import the mock handles and the module under test AFTER vi.mock is declared.
|
||||
const { swarmDispatchAndWait } = await import("../uok/swarm-dispatch.js");
|
||||
const { emitJournalEvent } = await import("../journal.js");
|
||||
|
||||
// runSingleAgent and runSingleAgentViaSwarm are exported for testing only.
|
||||
const { runSingleAgent, runSingleAgentViaSwarm } = await import(
|
||||
|
|
@ -409,3 +416,131 @@ test("AbortSignal is passed through to swarmDispatchAndWait options", async () =
|
|||
"signal must be an AbortSignal",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── swarm-dispatch journal events ────────────────────────────────────────────
|
||||
|
||||
test("swarm success → emits swarm-dispatch journal event with outcome=agent-reply", async () => {
|
||||
process.env.SF_SUBAGENT_VIA_SWARM = "1";
|
||||
swarmDispatchAndWait.mockResolvedValueOnce(makeDeterministicSwarmResult());
|
||||
|
||||
const agents = makeAgents();
|
||||
await runSingleAgent(
|
||||
DEFAULT_CWD,
|
||||
agents,
|
||||
"worker",
|
||||
"implement the feature",
|
||||
undefined,
|
||||
1,
|
||||
undefined,
|
||||
undefined,
|
||||
NOOP_MAKE_DETAILS,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
const calls = emitJournalEvent.mock.calls;
|
||||
assert.ok(calls.length > 0, "emitJournalEvent should have been called");
|
||||
const swarmCall = calls.find((c) => c[1]?.eventType === "swarm-dispatch");
|
||||
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted");
|
||||
const [calledBasePath, entry] = swarmCall;
|
||||
assert.equal(calledBasePath, DEFAULT_CWD);
|
||||
assert.match(entry.ts, /^\d{4}-\d{2}-\d{2}T/);
|
||||
assert.equal(entry.data.unitType, "delegate");
|
||||
assert.ok(
|
||||
entry.data.unitId.startsWith("worker-"),
|
||||
`unitId should start with "worker-", got: ${entry.data.unitId}`,
|
||||
);
|
||||
assert.equal(entry.data.targetAgent, "worker-1");
|
||||
assert.equal(entry.data.workMode, "build");
|
||||
assert.equal(typeof entry.data.toolCallCount, "number");
|
||||
assert.equal(entry.data.outcome, "agent-reply");
|
||||
assert.equal(entry.data.via, "subagent-extension");
|
||||
assert.equal(entry.data.agentName, "worker");
|
||||
});
|
||||
|
||||
test("swarm returns error → emits swarm-dispatch event with outcome=no-reply", async () => {
|
||||
process.env.SF_SUBAGENT_VIA_SWARM = "1";
|
||||
swarmDispatchAndWait.mockResolvedValueOnce({
|
||||
...makeDeterministicSwarmResult(),
|
||||
reply: null,
|
||||
error: "agent runner failed",
|
||||
});
|
||||
|
||||
const agents = makeAgents();
|
||||
await runSingleAgent(
|
||||
DEFAULT_CWD,
|
||||
agents,
|
||||
"worker",
|
||||
"task",
|
||||
undefined,
|
||||
1,
|
||||
undefined,
|
||||
undefined,
|
||||
NOOP_MAKE_DETAILS,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
const swarmCall = emitJournalEvent.mock.calls.find(
|
||||
(c) => c[1]?.eventType === "swarm-dispatch",
|
||||
);
|
||||
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted");
|
||||
const [, entry] = swarmCall;
|
||||
assert.equal(entry.data.outcome, "no-reply");
|
||||
assert.ok(entry.data.error?.includes("agent runner failed"));
|
||||
assert.equal(entry.data.via, "subagent-extension");
|
||||
});
|
||||
|
||||
test("swarmDispatchAndWait throws → emits swarm-dispatch event with outcome=error", async () => {
|
||||
process.env.SF_SUBAGENT_VIA_SWARM = "1";
|
||||
swarmDispatchAndWait.mockRejectedValueOnce(new Error("network timeout"));
|
||||
|
||||
const agents = makeAgents();
|
||||
await runSingleAgent(
|
||||
DEFAULT_CWD,
|
||||
agents,
|
||||
"worker",
|
||||
"task",
|
||||
undefined,
|
||||
1,
|
||||
undefined,
|
||||
undefined,
|
||||
NOOP_MAKE_DETAILS,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
const swarmCall = emitJournalEvent.mock.calls.find(
|
||||
(c) => c[1]?.eventType === "swarm-dispatch",
|
||||
);
|
||||
assert.ok(swarmCall, "a swarm-dispatch event must have been emitted on throw");
|
||||
const [, entry] = swarmCall;
|
||||
assert.equal(entry.data.outcome, "error");
|
||||
assert.ok(entry.data.error?.includes("network timeout"));
|
||||
});
|
||||
|
||||
test("journal write failure does not break subagent dispatch (fail-open)", async () => {
|
||||
process.env.SF_SUBAGENT_VIA_SWARM = "1";
|
||||
swarmDispatchAndWait.mockResolvedValueOnce(makeDeterministicSwarmResult());
|
||||
emitJournalEvent.mockImplementationOnce(() => {
|
||||
throw new Error("disk full");
|
||||
});
|
||||
|
||||
const agents = makeAgents();
|
||||
const result = await runSingleAgent(
|
||||
DEFAULT_CWD,
|
||||
agents,
|
||||
"worker",
|
||||
"task",
|
||||
undefined,
|
||||
1,
|
||||
undefined,
|
||||
undefined,
|
||||
NOOP_MAKE_DETAILS,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
// Must still succeed even though journal threw
|
||||
assert.equal(result.exitCode, 0);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue