fix(auto): avoid duplicate swarm checkpoints

This commit is contained in:
Mikael Hugo 2026-05-15 11:01:08 +02:00
parent 7a4a62e244
commit 5e478d6506
2 changed files with 48 additions and 49 deletions

View file

@ -619,45 +619,53 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
];
}
try {
appendAutonomousSolverCheckpoint(basePath, {
if (!hasCheckpointCall) {
try {
appendAutonomousSolverCheckpoint(basePath, {
unitType,
unitId,
outcome,
summary: summary || "Swarm agent completed unit turn.",
completedItems,
remainingItems,
verificationEvidence,
pdd: {
purpose:
"Synthetic checkpoint from swarm agent reply when the worker did not call the checkpoint tool.",
consumer: "phases-unit.js assessAutonomousSolverTurn",
contract:
"Falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
failureBoundary:
"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
nonGoals: "Does not synthesize completion.",
invariants:
"Synthetic checkpoints are only written when the worker emitted no real checkpoint call.",
assumptions:
"The swarm agent processed the unit prompt and returned a non-empty reply.",
},
});
debugLog("runUnit[swarm]", {
phase: "synthesized-checkpoint",
unitType,
unitId,
outcome,
});
} catch (cpErr) {
// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
debugLog("runUnit[swarm]", {
phase: "synthesized-checkpoint-error",
unitType,
unitId,
error: getErrorMessage(cpErr),
});
}
} else {
debugLog("runUnit[swarm]", {
phase: "real-checkpoint-observed",
unitType,
unitId,
outcome,
summary: summary || "Swarm agent completed unit turn.",
completedItems,
remainingItems,
verificationEvidence,
pdd: {
purpose:
"Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.",
consumer: "phases-unit.js assessAutonomousSolverTurn",
contract:
"outcome reflects the worker's checkpoint call when available; falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
failureBoundary:
"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
nonGoals:
"Does not synthesize fake tool calls — uses real ones when available.",
invariants:
"Never claims outcome=complete unless the worker explicitly called checkpoint with outcome='complete'.",
assumptions:
"The swarm agent processed the unit prompt and returned a non-empty reply.",
},
});
debugLog("runUnit[swarm]", {
phase: "synthesized-checkpoint",
unitType,
unitId,
outcome,
});
} catch (cpErr) {
// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
debugLog("runUnit[swarm]", {
phase: "synthesized-checkpoint-error",
unitType,
unitId,
error: getErrorMessage(cpErr),
});
}

View file

@ -985,10 +985,10 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
expect(result.swarmToolCallCount).toBe(0);
});
test("checkpoint tool call with outcome=complete → appendCheckpoint called with outcome=complete", async () => {
test("checkpoint tool call with outcome=complete relies on the real checkpoint write", async () => {
// The canonical completion detection: when the worker calls checkpoint with
// outcome='complete', runUnitViaSwarm should pass outcome='complete' to
// appendAutonomousSolverCheckpoint (not hardcode 'continue').
// outcome='complete', runUnitViaSwarm must not append a duplicate parent
// checkpoint. The tool execution already updated solver state.
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockWithToolCallEvents([
@ -1030,16 +1030,7 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
expect(result.status).toBe("completed");
// appendAutonomousSolverCheckpoint must have been called with outcome='complete'
expect(mockAppendCheckpoint).toHaveBeenCalledOnce();
const [, params] = mockAppendCheckpoint.mock.calls[0];
expect(params.outcome).toBe("complete");
expect(params.completedItems).toEqual([
"feature implemented",
"tests passing",
]);
expect(params.remainingItems).toEqual([]);
expect(params.verificationEvidence).toEqual(["npm test: all green"]);
expect(mockAppendCheckpoint).not.toHaveBeenCalled();
// The real checkpoint tool_use block must appear in event.messages[last].content
const lastMsg = result.event.messages[result.event.messages.length - 1];