From 996b82001f3830dd8a63c2cc9d02f0741cfb8e90 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 06:26:30 +0200 Subject: [PATCH] fix(auto): keep swarm continue checkpoints actionable --- src/resources/extensions/sf/auto/run-unit.js | 40 +++++++++++--- .../extensions/sf/autonomous-solver.js | 52 +++++++++---------- .../sf/tests/run-unit-via-swarm.test.mjs | 8 +++ 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index b7c0ba197..c8451ffbc 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -272,6 +272,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { const collectedToolCalls = []; let workerSignaledOutcome = null; // "complete" | "progress" | "continue" | "blocked" let workerSummary = null; + let workerCompletedItems = null; + let workerRemainingItems = null; + let workerVerificationEvidence = null; function onEvent(event) { if ( @@ -311,6 +314,15 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { if (args.summary) { workerSummary = String(args.summary); } + if (Array.isArray(args.completedItems)) { + workerCompletedItems = args.completedItems.map(String); + } + if (Array.isArray(args.remainingItems)) { + workerRemainingItems = args.remainingItems.map(String); + } + if (Array.isArray(args.verificationEvidence)) { + workerVerificationEvidence = args.verificationEvidence.map(String); + } } } } @@ -395,6 +407,25 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { replyText.length > 500 ? `${replyText.slice(0, 497)}...` : replyText; const summary = (workerSummary ?? truncatedReply) || "Swarm agent completed unit turn."; + const completedItems = + workerCompletedItems && workerCompletedItems.length > 0 + ? workerCompletedItems + : ["Swarm agent processed unit and replied."]; + const remainingItems = + workerRemainingItems && workerRemainingItems.length > 0 + ? workerRemainingItems + : outcome === "complete" + ? [] + : [ + `Continue ${unitType} ${unitId}; swarm worker did not provide an actionable completion checkpoint.`, + ]; + const verificationEvidence = + workerVerificationEvidence && workerVerificationEvidence.length > 0 + ? workerVerificationEvidence + : [ + `swarm-agent:${swarmResult.targetAgent}`, + `replyMessageId:${swarmResult.replyMessageId ?? "unknown"}`, + ]; debugLog("runUnit[swarm]", { phase: "outcome-derived", @@ -431,12 +462,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { unitId, outcome, summary: summary || "Swarm agent completed unit turn.", - completedItems: ["Swarm agent processed unit and replied."], - remainingItems: [], - verificationEvidence: [ - `swarm-agent:${swarmResult.targetAgent}`, - `replyMessageId:${swarmResult.replyMessageId ?? "unknown"}`, - ], + completedItems, + remainingItems, + verificationEvidence, pdd: { purpose: "Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.", diff --git a/src/resources/extensions/sf/autonomous-solver.js b/src/resources/extensions/sf/autonomous-solver.js index 66bc33706..c02d7965e 100644 --- a/src/resources/extensions/sf/autonomous-solver.js +++ b/src/resources/extensions/sf/autonomous-solver.js @@ -1071,6 +1071,32 @@ export function assessAutonomousSolverTurn( maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS, }; } + if ( + (checkpoint.outcome === "continue" || checkpoint.outcome === "decide") && + (checkpoint.remainingItems?.length ?? 0) === 0 + ) { + const repairAttempts = getMissingCheckpointRepairAttempts(state).filter( + (attempt) => Number(attempt.iteration) === Number(state.iteration), + ).length; + if (repairAttempts >= DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS) { + return { + action: "pause", + reason: "solver-empty-continue", + state, + repairAttempts, + maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS, + checkpoint, + }; + } + return { + action: "missing-checkpoint-retry", + reason: "solver-empty-continue", + state, + repairAttempt: repairAttempts + 1, + maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS, + checkpoint, + }; + } // Hard cap on excessive checkpoints within a single iteration if ( (state.checkpointCountThisIteration || 0) >= @@ -1104,32 +1130,6 @@ export function assessAutonomousSolverTurn( checkpoint, }; } - if ( - (checkpoint.outcome === "continue" || checkpoint.outcome === "decide") && - (checkpoint.remainingItems?.length ?? 0) === 0 - ) { - const repairAttempts = getMissingCheckpointRepairAttempts(state).filter( - (attempt) => Number(attempt.iteration) === Number(state.iteration), - ).length; - if (repairAttempts >= DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS) { - return { - action: "pause", - reason: "solver-empty-continue", - state, - repairAttempts, - maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS, - checkpoint, - }; - } - return { - action: "missing-checkpoint-retry", - reason: "solver-empty-continue", - state, - repairAttempt: repairAttempts + 1, - maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS, - checkpoint, - }; - } // No-op detection: a continue with zero work is not real progress if ( (checkpoint.outcome === "continue" || checkpoint.outcome === "decide") && diff --git a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs index bd84f2397..29862f32a 100644 --- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs +++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs @@ -315,6 +315,8 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => { expect(params.summary.length).toBeGreaterThan(0); expect(Array.isArray(params.completedItems)).toBe(true); expect(Array.isArray(params.remainingItems)).toBe(true); + expect(params.remainingItems.length).toBeGreaterThan(0); + expect(params.remainingItems[0]).toContain("Continue execute-task synth-chk-1"); expect(Array.isArray(params.verificationEvidence)).toBe(true); }); @@ -890,6 +892,12 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => { expect(mockAppendCheckpoint).toHaveBeenCalledOnce(); const [, params] = mockAppendCheckpoint.mock.calls[0]; expect(params.outcome).toBe("complete"); + expect(params.completedItems).toEqual([ + "feature implemented", + "tests passing", + ]); + expect(params.remainingItems).toEqual([]); + expect(params.verificationEvidence).toEqual(["npm test: all green"]); // The real checkpoint tool_use block must appear in event.messages[last].content const lastMsg = result.event.messages[result.event.messages.length - 1];