From 5e478d650664b0b9189f1b5b4b9f2942926c0d30 Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Fri, 15 May 2026 11:01:08 +0200
Subject: [PATCH] fix(auto): avoid duplicate swarm checkpoints

---
 src/resources/extensions/sf/auto/run-unit.js  | 80 ++++++++++---------
 .../sf/tests/run-unit-via-swarm.test.mjs      | 17 +---
 2 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js
index d91c05973..38bc64c85 100644
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@@ -619,45 +619,53 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
 		];
 	}
 
-	try {
-		appendAutonomousSolverCheckpoint(basePath, {
+	if (!hasCheckpointCall) {
+		try {
+			appendAutonomousSolverCheckpoint(basePath, {
+				unitType,
+				unitId,
+				outcome,
+				summary: summary || "Swarm agent completed unit turn.",
+				completedItems,
+				remainingItems,
+				verificationEvidence,
+				pdd: {
+					purpose:
+						"Synthetic checkpoint from swarm agent reply when the worker did not call the checkpoint tool.",
+					consumer: "phases-unit.js assessAutonomousSolverTurn",
+					contract:
+						"Falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
+					failureBoundary:
+						"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
+					evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
+					nonGoals: "Does not synthesize completion.",
+					invariants:
+						"Synthetic checkpoints are only written when the worker emitted no real checkpoint call.",
+					assumptions:
+						"The swarm agent processed the unit prompt and returned a non-empty reply.",
+				},
+			});
+			debugLog("runUnit[swarm]", {
+				phase: "synthesized-checkpoint",
+				unitType,
+				unitId,
+				outcome,
+			});
+		} catch (cpErr) {
+			// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
+			debugLog("runUnit[swarm]", {
+				phase: "synthesized-checkpoint-error",
+				unitType,
+				unitId,
+				error: getErrorMessage(cpErr),
+			});
+		}
+	} else {
+		debugLog("runUnit[swarm]", {
+			phase: "real-checkpoint-observed",
 			unitType,
 			unitId,
 			outcome,
-			summary: summary || "Swarm agent completed unit turn.",
-			completedItems,
-			remainingItems,
-			verificationEvidence,
-			pdd: {
-				purpose:
-					"Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.",
-				consumer: "phases-unit.js assessAutonomousSolverTurn",
-				contract:
-					"outcome reflects the worker's checkpoint call when available; falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
-				failureBoundary:
-					"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
-				evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
-				nonGoals:
-					"Does not synthesize fake tool calls — uses real ones when available.",
-				invariants:
-					"Never claims outcome=complete unless the worker explicitly called checkpoint with outcome='complete'.",
-				assumptions:
-					"The swarm agent processed the unit prompt and returned a non-empty reply.",
-			},
-		});
-		debugLog("runUnit[swarm]", {
-			phase: "synthesized-checkpoint",
-			unitType,
-			unitId,
-			outcome,
-		});
-	} catch (cpErr) {
-		// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
-		debugLog("runUnit[swarm]", {
-			phase: "synthesized-checkpoint-error",
-			unitType,
-			unitId,
-			error: getErrorMessage(cpErr),
 		});
 	}
 
diff --git a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs
index 52f195f94..c4cc936bb 100644
--- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs
+++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs
@@ -985,10 +985,10 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
 		expect(result.swarmToolCallCount).toBe(0);
 	});
 
-	test("checkpoint tool call with outcome=complete → appendCheckpoint called with outcome=complete", async () => {
+	test("checkpoint tool call with outcome=complete relies on the real checkpoint write", async () => {
 		// The canonical completion detection: when the worker calls checkpoint with
-		// outcome='complete', runUnitViaSwarm should pass outcome='complete' to
-		// appendAutonomousSolverCheckpoint (not hardcode 'continue').
+		// outcome='complete', runUnitViaSwarm must not append a duplicate parent
+		// checkpoint. The tool execution already updated solver state.
 		process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
 
 		mockWithToolCallEvents([
@@ -1030,16 +1030,7 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
 
 		expect(result.status).toBe("completed");
 
-		// appendAutonomousSolverCheckpoint must have been called with outcome='complete'
-		expect(mockAppendCheckpoint).toHaveBeenCalledOnce();
-		const [, params] = mockAppendCheckpoint.mock.calls[0];
-		expect(params.outcome).toBe("complete");
-		expect(params.completedItems).toEqual([
-			"feature implemented",
-			"tests passing",
-		]);
-		expect(params.remainingItems).toEqual([]);
-		expect(params.verificationEvidence).toEqual(["npm test: all green"]);
+		expect(mockAppendCheckpoint).not.toHaveBeenCalled();
 
 		// The real checkpoint tool_use block must appear in event.messages[last].content
 		const lastMsg = result.event.messages[result.event.messages.length - 1];