fix(auto): avoid duplicate swarm checkpoints
This commit is contained in:
parent
7a4a62e244
commit
5e478d6506
2 changed files with 48 additions and 49 deletions
|
|
@ -619,45 +619,53 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
|
|||
];
|
||||
}
|
||||
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(basePath, {
|
||||
if (!hasCheckpointCall) {
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(basePath, {
|
||||
unitType,
|
||||
unitId,
|
||||
outcome,
|
||||
summary: summary || "Swarm agent completed unit turn.",
|
||||
completedItems,
|
||||
remainingItems,
|
||||
verificationEvidence,
|
||||
pdd: {
|
||||
purpose:
|
||||
"Synthetic checkpoint from swarm agent reply when the worker did not call the checkpoint tool.",
|
||||
consumer: "phases-unit.js assessAutonomousSolverTurn",
|
||||
contract:
|
||||
"Falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
|
||||
failureBoundary:
|
||||
"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
|
||||
evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
|
||||
nonGoals: "Does not synthesize completion.",
|
||||
invariants:
|
||||
"Synthetic checkpoints are only written when the worker emitted no real checkpoint call.",
|
||||
assumptions:
|
||||
"The swarm agent processed the unit prompt and returned a non-empty reply.",
|
||||
},
|
||||
});
|
||||
debugLog("runUnit[swarm]", {
|
||||
phase: "synthesized-checkpoint",
|
||||
unitType,
|
||||
unitId,
|
||||
outcome,
|
||||
});
|
||||
} catch (cpErr) {
|
||||
// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
|
||||
debugLog("runUnit[swarm]", {
|
||||
phase: "synthesized-checkpoint-error",
|
||||
unitType,
|
||||
unitId,
|
||||
error: getErrorMessage(cpErr),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
debugLog("runUnit[swarm]", {
|
||||
phase: "real-checkpoint-observed",
|
||||
unitType,
|
||||
unitId,
|
||||
outcome,
|
||||
summary: summary || "Swarm agent completed unit turn.",
|
||||
completedItems,
|
||||
remainingItems,
|
||||
verificationEvidence,
|
||||
pdd: {
|
||||
purpose:
|
||||
"Checkpoint from swarm agent reply — real outcome when worker called checkpoint tool, conservative fallback otherwise.",
|
||||
consumer: "phases-unit.js assessAutonomousSolverTurn",
|
||||
contract:
|
||||
"outcome reflects the worker's checkpoint call when available; falls back to 'continue' so the loop re-evaluates rather than incorrectly completing.",
|
||||
failureBoundary:
|
||||
"appendAutonomousSolverCheckpoint failure is swallowed — the loop will repair via its own missing-checkpoint retry path.",
|
||||
evidence: `swarm-agent ${swarmResult.targetAgent} replied with ${replyText.length} chars; workerSignaledOutcome=${workerSignaledOutcome ?? "null"}; collectedToolCalls=${collectedToolCalls.length}`,
|
||||
nonGoals:
|
||||
"Does not synthesize fake tool calls — uses real ones when available.",
|
||||
invariants:
|
||||
"Never claims outcome=complete unless the worker explicitly called checkpoint with outcome='complete'.",
|
||||
assumptions:
|
||||
"The swarm agent processed the unit prompt and returned a non-empty reply.",
|
||||
},
|
||||
});
|
||||
debugLog("runUnit[swarm]", {
|
||||
phase: "synthesized-checkpoint",
|
||||
unitType,
|
||||
unitId,
|
||||
outcome,
|
||||
});
|
||||
} catch (cpErr) {
|
||||
// Fail-open: if checkpoint synthesis fails, the repair loop will handle it.
|
||||
debugLog("runUnit[swarm]", {
|
||||
phase: "synthesized-checkpoint-error",
|
||||
unitType,
|
||||
unitId,
|
||||
error: getErrorMessage(cpErr),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -985,10 +985,10 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
|
|||
expect(result.swarmToolCallCount).toBe(0);
|
||||
});
|
||||
|
||||
test("checkpoint tool call with outcome=complete → appendCheckpoint called with outcome=complete", async () => {
|
||||
test("checkpoint tool call with outcome=complete relies on the real checkpoint write", async () => {
|
||||
// The canonical completion detection: when the worker calls checkpoint with
|
||||
// outcome='complete', runUnitViaSwarm should pass outcome='complete' to
|
||||
// appendAutonomousSolverCheckpoint (not hardcode 'continue').
|
||||
// outcome='complete', runUnitViaSwarm must not append a duplicate parent
|
||||
// checkpoint. The tool execution already updated solver state.
|
||||
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
|
||||
|
||||
mockWithToolCallEvents([
|
||||
|
|
@ -1030,16 +1030,7 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => {
|
|||
|
||||
expect(result.status).toBe("completed");
|
||||
|
||||
// appendAutonomousSolverCheckpoint must have been called with outcome='complete'
|
||||
expect(mockAppendCheckpoint).toHaveBeenCalledOnce();
|
||||
const [, params] = mockAppendCheckpoint.mock.calls[0];
|
||||
expect(params.outcome).toBe("complete");
|
||||
expect(params.completedItems).toEqual([
|
||||
"feature implemented",
|
||||
"tests passing",
|
||||
]);
|
||||
expect(params.remainingItems).toEqual([]);
|
||||
expect(params.verificationEvidence).toEqual(["npm test: all green"]);
|
||||
expect(mockAppendCheckpoint).not.toHaveBeenCalled();
|
||||
|
||||
// The real checkpoint tool_use block must appear in event.messages[last].content
|
||||
const lastMsg = result.event.messages[result.event.messages.length - 1];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue