fix(auto): keep swarm continue checkpoints actionable

This commit is contained in:
Mikael Hugo 2026-05-15 06:26:30 +02:00
parent 3464db441c
commit 996b82001f
3 changed files with 68 additions and 32 deletions

View file

@ -272,6 +272,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
const collectedToolCalls = [];
let workerSignaledOutcome = null; // "complete" | "progress" | "continue" | "blocked"
let workerSummary = null;
let workerCompletedItems = null;
let workerRemainingItems = null;
let workerVerificationEvidence = null;
function onEvent(event) {
if (
@ -311,6 +314,15 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
if (args.summary) {
workerSummary = String(args.summary);
}
if (Array.isArray(args.completedItems)) {
workerCompletedItems = args.completedItems.map(String);
}
if (Array.isArray(args.remainingItems)) {
workerRemainingItems = args.remainingItems.map(String);
}
if (Array.isArray(args.verificationEvidence)) {
workerVerificationEvidence = args.verificationEvidence.map(String);
}
}
}
}
@ -395,6 +407,25 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
replyText.length > 500 ? `${replyText.slice(0, 497)}...` : replyText;
const summary =
(workerSummary ?? truncatedReply) || "Swarm agent completed unit turn.";
const completedItems =
workerCompletedItems && workerCompletedItems.length > 0
? workerCompletedItems
: ["Swarm agent processed unit and replied."];
const remainingItems =
workerRemainingItems && workerRemainingItems.length > 0
? workerRemainingItems
: outcome === "complete"
? []
: [
`Continue ${unitType} ${unitId}; swarm worker did not provide an actionable completion checkpoint.`,
];
const verificationEvidence =
workerVerificationEvidence && workerVerificationEvidence.length > 0
? workerVerificationEvidence
: [
`swarm-agent:${swarmResult.targetAgent}`,
`replyMessageId:${swarmResult.replyMessageId ?? "unknown"}`,
];
debugLog("runUnit[swarm]", {
phase: "outcome-derived",
@ -431,12 +462,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
unitId,
outcome,
summary: summary || "Swarm agent completed unit turn.",
completedItems: ["Swarm agent processed unit and replied."],
remainingItems: [],
verificationEvidence: [
`swarm-agent:${swarmResult.targetAgent}`,
`replyMessageId:${swarmResult.replyMessageId ?? "unknown"}`,
],
completedItems,
remainingItems,
verificationEvidence,
pdd: {
purpose:
"Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.",

View file

@ -1071,6 +1071,32 @@ export function assessAutonomousSolverTurn(
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
};
}
if (
(checkpoint.outcome === "continue" || checkpoint.outcome === "decide") &&
(checkpoint.remainingItems?.length ?? 0) === 0
) {
const repairAttempts = getMissingCheckpointRepairAttempts(state).filter(
(attempt) => Number(attempt.iteration) === Number(state.iteration),
).length;
if (repairAttempts >= DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS) {
return {
action: "pause",
reason: "solver-empty-continue",
state,
repairAttempts,
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
checkpoint,
};
}
return {
action: "missing-checkpoint-retry",
reason: "solver-empty-continue",
state,
repairAttempt: repairAttempts + 1,
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
checkpoint,
};
}
// Hard cap on excessive checkpoints within a single iteration
if (
(state.checkpointCountThisIteration || 0) >=
@ -1104,32 +1130,6 @@ export function assessAutonomousSolverTurn(
checkpoint,
};
}
if (
(checkpoint.outcome === "continue" || checkpoint.outcome === "decide") &&
(checkpoint.remainingItems?.length ?? 0) === 0
) {
const repairAttempts = getMissingCheckpointRepairAttempts(state).filter(
(attempt) => Number(attempt.iteration) === Number(state.iteration),
).length;
if (repairAttempts >= DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS) {
return {
action: "pause",
reason: "solver-empty-continue",
state,
repairAttempts,
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
checkpoint,
};
}
return {
action: "missing-checkpoint-retry",
reason: "solver-empty-continue",
state,
repairAttempt: repairAttempts + 1,
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
checkpoint,
};
}
// No-op detection: a continue with zero work is not real progress
if (
(checkpoint.outcome === "continue" || checkpoint.outcome === "decide") &&

View file

@ -315,6 +315,8 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => {
expect(params.summary.length).toBeGreaterThan(0);
expect(Array.isArray(params.completedItems)).toBe(true);
expect(Array.isArray(params.remainingItems)).toBe(true);
expect(params.remainingItems.length).toBeGreaterThan(0);
expect(params.remainingItems[0]).toContain("Continue execute-task synth-chk-1");
expect(Array.isArray(params.verificationEvidence)).toBe(true);
});
@ -890,6 +892,12 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
expect(mockAppendCheckpoint).toHaveBeenCalledOnce();
const [, params] = mockAppendCheckpoint.mock.calls[0];
expect(params.outcome).toBe("complete");
expect(params.completedItems).toEqual([
"feature implemented",
"tests passing",
]);
expect(params.remainingItems).toEqual([]);
expect(params.verificationEvidence).toEqual(["npm test: all green"]);
// The real checkpoint tool_use block must appear in event.messages[last].content
const lastMsg = result.event.messages[result.event.messages.length - 1];