fix(auto): cap checkpoint repairs before retries

This commit is contained in:
Mikael Hugo 2026-05-15 10:58:02 +02:00
parent 604ebbf824
commit 7a4a62e244
2 changed files with 45 additions and 28 deletions

View file

@ -1050,6 +1050,21 @@ export function assessAutonomousSolverTurn(
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
};
}
// Hard cap on excessive checkpoints within a single iteration. This check
// must run before repair classifications such as empty/no-op continue; once
// the cap is reached, launching another repair turn only amplifies the loop.
if (
(state.checkpointCountThisIteration || 0) >= MAX_CHECKPOINTS_PER_ITERATION
) {
return {
action: "pause",
reason: "solver-excessive-checkpoints",
state,
checkpoint,
checkpointCount: state.checkpointCountThisIteration,
maxCheckpointCount: MAX_CHECKPOINTS_PER_ITERATION,
};
}
if (
(checkpoint.outcome === "continue" || checkpoint.outcome === "decide") &&
(checkpoint.remainingItems?.length ?? 0) === 0
@ -1076,19 +1091,6 @@ export function assessAutonomousSolverTurn(
checkpoint,
};
}
// Hard cap on excessive checkpoints within a single iteration
if (
(state.checkpointCountThisIteration || 0) >= MAX_CHECKPOINTS_PER_ITERATION
) {
return {
action: "pause",
reason: "solver-excessive-checkpoints",
state,
checkpoint,
checkpointCount: state.checkpointCountThisIteration,
maxCheckpointCount: MAX_CHECKPOINTS_PER_ITERATION,
};
}
if (
state.iteration >= state.maxIterations &&
checkpoint.outcome !== "complete"

View file

@ -417,6 +417,33 @@ describe("autonomous solver", () => {
expect(result.checkpointCount).toBe(5);
});
test("assessAutonomousSolverTurn_excessive_checkpoints_preempts_empty_continue_repair", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "plan-slice", "M002/S01");
for (let i = 0; i < 5; i++) {
appendAutonomousSolverCheckpoint(project, {
unitType: "plan-slice",
unitId: "M002/S01",
outcome: "continue",
summary: `Empty continue ${i + 1}`,
completedItems: [],
remainingItems: [],
verificationEvidence: [],
pdd: pdd(),
});
}
const result = assessAutonomousSolverTurn(
project,
"plan-slice",
"M002/S01",
);
expect(result.action).toBe("pause");
expect(result.reason).toBe("solver-excessive-checkpoints");
expect(result.checkpointCount).toBe(5);
});
test("steering_append_consume_is_idempotent", () => {
const project = makeProject();
appendAutonomousSolverSteering(project, "Prefer runtime enforcement.");
@ -1044,11 +1071,7 @@ describe("appendAutonomousSolverCheckpoint sticky identity", () => {
// failed sameUnit() against the orchestrator's identity and re-fired
// repair forever. The active state's identity must be sticky.
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
beginAutonomousSolverIteration(project, "execute-task", "M001/S04/T02");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "parallel-research", // <-- agent guesses wrong
@ -1082,11 +1105,7 @@ describe("appendAutonomousSolverCheckpoint sticky identity", () => {
// orchestrator's identity, assess sees outcome=complete and returns
// action=complete (NOT missing-checkpoint-retry).
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
beginAutonomousSolverIteration(project, "execute-task", "M001/S04/T02");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "wrong-guess",
@ -1107,11 +1126,7 @@ describe("appendAutonomousSolverCheckpoint sticky identity", () => {
test("matching unitId does not flag mismatch", () => {
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
beginAutonomousSolverIteration(project, "execute-task", "M001/S04/T02");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S04/T02",