fix(auto): avoid duplicate swarm checkpoints

2026-05-15 11:01:08 +02:00 · 2026-05-15 11:01:08 +02:00 · 5e478d6506
commit 5e478d6506
parent 7a4a62e244
2 changed files with 48 additions and 49 deletions
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@ -619,45 +619,53 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
 		];
 	}

-	try {
-		appendAutonomousSolverCheckpoint(basePath, {
+	if (!hasCheckpointCall) {
+		try {
+			appendAutonomousSolverCheckpoint(basePath, {
+				unitType,
+				unitId,
+				outcome,
+				summary: summary || "Swarm agent completed unit turn.",
+				completedItems,
+				remainingItems,
+				verificationEvidence,
+				pdd: {
+					purpose:
+						"Synthetic checkpoint from swarm agent reply when the worker did not call the checkpoint tool.",
+					consumer: "phases-unit.js assessAutonomousSolverTurn",
+					contract:
+						"Falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
+					failureBoundary:
+						"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
+					evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
+					nonGoals: "Does not synthesize completion.",
+					invariants:
+						"Synthetic checkpoints are only written when the worker emitted no real checkpoint call.",
+					assumptions:
+						"The swarm agent processed the unit prompt and returned a non-empty reply.",
+				},
+			});
+			debugLog("runUnit[swarm]", {
+				phase: "synthesized-checkpoint",
+				unitType,
+				unitId,
+				outcome,
+			});
+		} catch (cpErr) {
+			// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
+			debugLog("runUnit[swarm]", {
+				phase: "synthesized-checkpoint-error",
+				unitType,
+				unitId,
+				error: getErrorMessage(cpErr),
+			});
+		}
+	} else {
+		debugLog("runUnit[swarm]", {
+			phase: "real-checkpoint-observed",
 			unitType,
 			unitId,
 			outcome,
-			summary: summary || "Swarm agent completed unit turn.",
-			completedItems,
-			remainingItems,
-			verificationEvidence,
-			pdd: {
-				purpose:
-					"Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.",
-				consumer: "phases-unit.js assessAutonomousSolverTurn",
-				contract:
-					"outcome reflects the worker's checkpoint call when available; falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
-				failureBoundary:
-					"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
-				evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
-				nonGoals:
-					"Does not synthesize fake tool calls — uses real ones when available.",
-				invariants:
-					"Never claims outcome=complete unless the worker explicitly called checkpoint with outcome='complete'.",
-				assumptions:
-					"The swarm agent processed the unit prompt and returned a non-empty reply.",
-			},
-		});
-		debugLog("runUnit[swarm]", {
-			phase: "synthesized-checkpoint",
-			unitType,
-			unitId,
-			outcome,
-		});
-	} catch (cpErr) {
-		// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
-		debugLog("runUnit[swarm]", {
-			phase: "synthesized-checkpoint-error",
-			unitType,
-			unitId,
-			error: getErrorMessage(cpErr),
 		});
 	}

--- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs
+++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs
@ -985,10 +985,10 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
 		expect(result.swarmToolCallCount).toBe(0);
 	});

-	test("checkpoint tool call with outcome=complete → appendCheckpoint called with outcome=complete", async () => {
+	test("checkpoint tool call with outcome=complete relies on the real checkpoint write", async () => {
 		// The canonical completion detection: when the worker calls checkpoint with
-		// outcome='complete', runUnitViaSwarm should pass outcome='complete' to
-		// appendAutonomousSolverCheckpoint (not hardcode 'continue').
+		// outcome='complete', runUnitViaSwarm must not append a duplicate parent
+		// checkpoint. The tool execution already updated solver state.
 		process.env.SF_AUTONOMOUS_VIA_SWARM = "1";

 		mockWithToolCallEvents([
@ -1030,16 +1030,7 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {

 		expect(result.status).toBe("completed");

-		// appendAutonomousSolverCheckpoint must have been called with outcome='complete'
-		expect(mockAppendCheckpoint).toHaveBeenCalledOnce();
-		const [, params] = mockAppendCheckpoint.mock.calls[0];
-		expect(params.outcome).toBe("complete");
-		expect(params.completedItems).toEqual([
-			"feature implemented",
-			"tests passing",
-		]);
-		expect(params.remainingItems).toEqual([]);
-		expect(params.verificationEvidence).toEqual(["npm test: all green"]);
+		expect(mockAppendCheckpoint).not.toHaveBeenCalled();

 		// The real checkpoint tool_use block must appear in event.messages[last].content
 		const lastMsg = result.event.messages[result.event.messages.length - 1];