fix(auto): re-dispatch on executor refusal instead of pausing
The autonomous solver was designed precisely to handle executor refusals
(per its own docstring: "the solver role MUST stay on a stable, agentic,
refusal-resistant model independent of any per-unit routing choices"),
but the refusal handler short-circuited past it and emitted a `blocked`
checkpoint, which assessAutonomousSolverTurn unconditionally turns into
a `pause` — defeating autonomous mode every time the router selects a
capability-mismatched executor.
The 1h model-block added in 3f2babb5d was the right primitive but had no
consumer: nothing actually re-dispatched the unit after the model was
blocked, so the block only mattered if the operator manually unpaused
and retried.
This change wires the missing consumer:
- Add per-unit `executorRefusalEscalations` counter to solver state plus
a `recordExecutorRefusalEscalation` helper. Counter persists across
iterations of the same unit and resets on unit change.
- On `executor-refused`: block the refusing model and slice-routing entry
(unchanged), file self-feedback (unchanged), then synthesize a
`continue` checkpoint and return `{ action: "continue" }` directly so
the auto loop re-dispatches the unit. selectAndApplyModel will skip
the now-blocked model and pick a higher-tier fallback.
- Bounded by `MAX_EXECUTOR_REFUSAL_ESCALATIONS=3`. When the budget is
exhausted (an entire fallback chain refused on the same unit), fall
back to the legacy blocked-and-pause path so the operator can review.
- Bypass `assessAutonomousSolverTurn` on the refusal-continue path
because its no-op detector would (correctly) reject a continue over a
refusal transcript — but here the "no-op" is the whole point: we are
explicitly swapping the routed model.
Tests cover the new state field's init/persistence/reset semantics and
the constant's invariants. Full SF extension suite (1369 tests) passes.
Refs: sf-mp3bm6u0-2fskt8 (now fully addressed, not just AC1)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
288a2a5fd7
commit
5a2618c05d
3 changed files with 215 additions and 43 deletions
|
|
@ -35,8 +35,10 @@ import {
|
|||
classifyExecutorRefusal,
|
||||
consumePendingAutonomousSolverSteering,
|
||||
getConfiguredAutonomousSolverMaxIterations,
|
||||
MAX_EXECUTOR_REFUSAL_ESCALATIONS,
|
||||
readAutonomousSolverState,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
recordExecutorRefusalEscalation,
|
||||
} from "../autonomous-solver.js";
|
||||
import { blockModel } from "../blocked-models.js";
|
||||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
|
||||
|
|
@ -745,8 +747,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
? { action: "none" }
|
||||
: { action: "pending" };
|
||||
|
||||
// Refusal short-circuit: when the executor model returned a generic refusal,
|
||||
// synthesize a blocked checkpoint immediately and skip the solver pass.
|
||||
// Refusal handling: when the executor model returned a generic refusal, the
|
||||
// model is capability-mismatched for this unit. Block it (so selectAndApplyModel
|
||||
// excludes it on the next dispatch), evict slice routing, file self-feedback,
|
||||
// and re-dispatch with a tier-escalated model — bounded by MAX_EXECUTOR_REFUSAL_
|
||||
// ESCALATIONS so a fallback chain of refusing models cannot loop forever. Only
|
||||
// when the escalation budget is exhausted do we fall back to a blocked
|
||||
// checkpoint that pauses the loop for operator intervention.
|
||||
if (unitResult.status !== "cancelled" && refusal) {
|
||||
const executorModel =
|
||||
s.currentUnitModel?.provider && s.currentUnitModel?.id
|
||||
|
|
@ -760,9 +767,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
// Temporarily block the refusing model so the router skips it on retry.
|
||||
// This satisfies AC1 of sf-mp3bm6u0-2fskt8: the executor model is
|
||||
// escalated because the blocked model will be excluded from selection.
|
||||
// Block the refusing model so the router skips it on retry. The next
|
||||
// selectAndApplyModel call will pick a higher-tier fallback.
|
||||
try {
|
||||
const refusedProvider = s.currentUnitModel?.provider ?? "";
|
||||
const refusedId = s.currentUnitModel?.id ?? "";
|
||||
|
|
@ -778,46 +784,14 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
} catch {
|
||||
// best-effort — blocking must not break the refusal handler
|
||||
}
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(s.basePath, {
|
||||
unitType,
|
||||
unitId,
|
||||
outcome: "blocked",
|
||||
summary: `Executor (${executorModel}) refused the task. Pattern: ${refusal.pattern}. The model has been temporarily blocked and will be skipped on retry; escalate the executor model or unblock this unit manually.`,
|
||||
completedItems: [],
|
||||
remainingItems: [
|
||||
`Re-run ${unitType} ${unitId} with a more capable executor model — current routing selected an incapable model.`,
|
||||
],
|
||||
verificationEvidence: [
|
||||
`executor-refusal-pattern=${refusal.pattern}`,
|
||||
`executor-model=${executorModel}`,
|
||||
],
|
||||
blockerReason: `executor-refused (${refusal.pattern})`,
|
||||
pdd: {
|
||||
purpose:
|
||||
"Surface executor refusals as protocol-level blockers instead of synthesizing fake progress.",
|
||||
consumer: "autonomous loop pause-handler",
|
||||
contract:
|
||||
"On `executor-refused`, the loop pauses and self-feedback is filed; the operator must escalate the executor model.",
|
||||
failureBoundary:
|
||||
"If the operator does not escalate, the same refusal will recur on next dispatch.",
|
||||
evidence: "classifyExecutorRefusal matched a refusal pattern",
|
||||
nonGoals:
|
||||
"This does not retry the unit automatically — capability mismatches require operator judgement (or a future automatic escalation policy).",
|
||||
invariants: "Refusal never silently synthesizes a continue.",
|
||||
assumptions:
|
||||
"The refusal pattern set in classifyExecutorRefusal is conservative — false positives are rare and require operator review.",
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// If synthesis fails, fall through to solver pass
|
||||
}
|
||||
// File self-feedback for observability (operator-visible signal that
|
||||
// this unit type is being routed to capability-mismatched models).
|
||||
try {
|
||||
const feedback = recordSelfFeedback(
|
||||
{
|
||||
kind: "executor-refused",
|
||||
severity: "high",
|
||||
summary: `Executor ${executorModel} refused ${unitType} ${unitId} with pattern ${refusal.pattern}; loop paused to prevent fake-progress synthesis.`,
|
||||
summary: `Executor ${executorModel} refused ${unitType} ${unitId} with pattern ${refusal.pattern}; model blocked and re-dispatching with tier escalation.`,
|
||||
evidence: [
|
||||
`unit=${unitType} ${unitId}`,
|
||||
`executor=${executorModel}`,
|
||||
|
|
@ -826,9 +800,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
refusal.evidence ?? "",
|
||||
].join("\n"),
|
||||
suggestedFix:
|
||||
"Escalate the executor model for this unit (or unit type) — the currently routed model lacks the agentic capabilities required. Long-term: separate the executor and autonomous-solver roles per ADR-0079 and pin the solver to a stable agentic model.",
|
||||
"Routing repeatedly selects a capability-mismatched executor for this unit type. Update the router's tier-floor for this unit type so the refusing tier is excluded by default, or add the refusing model to a permanent block list.",
|
||||
acceptanceCriteria: [
|
||||
"Executor model for this unit type is escalated to a model that passes the refusal-resistant tier.",
|
||||
"Router's effective tier-floor for this unit type excludes the refusing model class without requiring a runtime block.",
|
||||
"Refusal pattern is added to classifyExecutorRefusal if a novel phrasing slipped through.",
|
||||
],
|
||||
occurredIn: { unitType, unitId },
|
||||
|
|
@ -853,8 +827,120 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
} catch {
|
||||
// self-feedback is observability; never block loop progression on it
|
||||
}
|
||||
// Bounded re-dispatch: if the per-unit escalation budget is not yet
|
||||
// exhausted, synthesize a `continue` checkpoint and return the
|
||||
// re-dispatch action directly. We bypass assessAutonomousSolverTurn
|
||||
// here because the no-op detector would (correctly) reject a continue
|
||||
// over a refusal transcript — but in this case the "no-op" is the whole
|
||||
// point, since we are explicitly changing the routed model on retry.
|
||||
const escalation = recordExecutorRefusalEscalation(
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
);
|
||||
if (escalation <= MAX_EXECUTOR_REFUSAL_ESCALATIONS) {
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(s.basePath, {
|
||||
unitType,
|
||||
unitId,
|
||||
outcome: "continue",
|
||||
summary: `Executor (${executorModel}) refused. Pattern: ${refusal.pattern}. Model blocked; re-dispatching with model excluded (escalation ${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}).`,
|
||||
completedItems: [],
|
||||
remainingItems: [
|
||||
`Re-dispatch ${unitType} ${unitId}; refusing model is blocked so selectAndApplyModel will pick a higher-tier fallback.`,
|
||||
],
|
||||
verificationEvidence: [
|
||||
`executor-refusal-pattern=${refusal.pattern}`,
|
||||
`executor-model=${executorModel}`,
|
||||
`refusal-escalation=${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}`,
|
||||
],
|
||||
pdd: {
|
||||
purpose:
|
||||
"Auto-escalate capability-mismatched executor selections by blocking the refusing model and re-dispatching, so SF completes work without operator intervention when a higher-tier model exists in the fallback chain.",
|
||||
consumer: "autonomous loop continue handler",
|
||||
contract:
|
||||
"On `executor-refused`, block the refusing model and emit `continue` so the loop re-dispatches with the blocked model excluded — bounded by MAX_EXECUTOR_REFUSAL_ESCALATIONS to prevent runaway loops over an all-refusing fallback chain.",
|
||||
failureBoundary:
|
||||
"After MAX_EXECUTOR_REFUSAL_ESCALATIONS refusals on the same unit, fall back to the legacy blocked-and-pause path so the operator can intervene.",
|
||||
evidence:
|
||||
"classifyExecutorRefusal matched a refusal pattern; the responsible model is now in blocked-models.json with a 1-hour TTL.",
|
||||
nonGoals:
|
||||
"This does not change the router's tier-floor — repeated refusals across units indicate the router still needs tuning (filed via self-feedback).",
|
||||
invariants:
|
||||
"The refusing model is never silently retried; it is always blocked before re-dispatch.",
|
||||
assumptions:
|
||||
"The fallback chain in effectiveModelConfig contains at least one higher-tier model that does not also refuse.",
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// If synthesis fails, fall through to the budget-exhausted branch
|
||||
// below so the loop still has a defined outcome.
|
||||
}
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "executor-refused-redispatch",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
executorModel,
|
||||
pattern: refusal.pattern,
|
||||
escalation,
|
||||
maxEscalations: MAX_EXECUTOR_REFUSAL_ESCALATIONS,
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Executor ${executorModel} refused ${unitType} ${unitId} (${refusal.pattern}); blocked and re-dispatching (escalation ${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}).`,
|
||||
"warning",
|
||||
);
|
||||
return {
|
||||
action: "continue",
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: unitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
// Escalation budget exhausted: emit the legacy blocked checkpoint and
|
||||
// let the existing pause path take over so the operator can intervene.
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(s.basePath, {
|
||||
unitType,
|
||||
unitId,
|
||||
outcome: "blocked",
|
||||
summary: `Executor (${executorModel}) refused the task. Pattern: ${refusal.pattern}. Refusal-escalation budget exhausted (${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}) — every model tried in the fallback chain refused. Operator must escalate routing or add a permanent block.`,
|
||||
completedItems: [],
|
||||
remainingItems: [
|
||||
`Re-run ${unitType} ${unitId} with a more capable executor model — the entire ${MAX_EXECUTOR_REFUSAL_ESCALATIONS}-step fallback chain refused.`,
|
||||
],
|
||||
verificationEvidence: [
|
||||
`executor-refusal-pattern=${refusal.pattern}`,
|
||||
`executor-model=${executorModel}`,
|
||||
`refusal-escalations-exhausted=${escalation}`,
|
||||
],
|
||||
blockerReason: `executor-refused-budget-exhausted (${refusal.pattern})`,
|
||||
pdd: {
|
||||
purpose:
|
||||
"Surface executor refusals as protocol-level blockers when bounded auto-escalation has been exhausted.",
|
||||
consumer: "autonomous loop pause-handler",
|
||||
contract:
|
||||
"After MAX_EXECUTOR_REFUSAL_ESCALATIONS refusals on the same unit, pause the loop and require operator intervention.",
|
||||
failureBoundary:
|
||||
"If the operator does not escalate, the same refusal will recur on next dispatch.",
|
||||
evidence: `${escalation} consecutive executor refusals on this unit`,
|
||||
nonGoals:
|
||||
"This does not retry the unit automatically beyond the budget — capability mismatches that defeat the entire fallback chain require operator judgement.",
|
||||
invariants: "Refusal never silently synthesizes a continue.",
|
||||
assumptions:
|
||||
"The refusal pattern set in classifyExecutorRefusal is conservative — false positives are rare and require operator review.",
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// If synthesis fails, fall through to solver pass
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Executor ${executorModel} refused ${unitType} ${unitId} (${refusal.pattern}); autonomous loop pausing instead of synthesizing fake progress. See SELF-FEEDBACK.md for escalation guidance.`,
|
||||
`Executor refused ${unitType} ${unitId} after ${MAX_EXECUTOR_REFUSAL_ESCALATIONS} tier escalations; pausing for operator review. See SELF-FEEDBACK.md.`,
|
||||
"error",
|
||||
);
|
||||
solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId);
|
||||
|
|
|
|||
|
|
@ -269,11 +269,51 @@ export function beginAutonomousSolverIteration(
|
|||
: [],
|
||||
// Safety cap: how many checkpoints have been written this iteration
|
||||
checkpointCountThisIteration: 0,
|
||||
// Per-unit budget for executor-refusal-driven re-dispatches. Reset when
|
||||
// the unit changes; persists across iterations of the same unit so a
|
||||
// resumed run does not silently get a fresh budget.
|
||||
executorRefusalEscalations: sameUnit(existing, unitType, unitId)
|
||||
? Number(existing.executorRefusalEscalations) || 0
|
||||
: 0,
|
||||
};
|
||||
writeState(basePath, state);
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum number of executor-refusal-driven re-dispatches for a single unit
|
||||
* before the loop falls back to the legacy blocked-and-pause behavior. Three
|
||||
* gives the router two tier escalations after the initial refusal (typical
|
||||
* tier ladder depth), which is plenty without enabling runaway loops if every
|
||||
* fallback model also refuses.
|
||||
*/
|
||||
export const MAX_EXECUTOR_REFUSAL_ESCALATIONS = 3;
|
||||
|
||||
/**
|
||||
* Increment the per-unit executor-refusal escalation counter and return the
|
||||
* new count.
|
||||
*
|
||||
* Purpose: when classifyExecutorRefusal fires, the refusal handler blocks the
|
||||
* refusing model and re-dispatches the unit so selectAndApplyModel picks a
|
||||
* higher-tier alternative. This counter bounds that retry loop so an entire
|
||||
* fallback chain of refusing models cannot loop forever — once the budget is
|
||||
* exhausted, the unit pauses for operator intervention via the legacy
|
||||
* blocked-checkpoint path.
|
||||
*
|
||||
* Consumer: runUnitPhase refusal branch in auto/phases-unit.js.
|
||||
*/
|
||||
export function recordExecutorRefusalEscalation(basePath, unitType, unitId) {
|
||||
const state = readJson(statePath(basePath));
|
||||
if (!sameUnit(state, unitType, unitId)) return 0;
|
||||
const next = (Number(state.executorRefusalEscalations) || 0) + 1;
|
||||
writeState(basePath, {
|
||||
...state,
|
||||
executorRefusalEscalations: next,
|
||||
updatedAt: nowIso(),
|
||||
});
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the PDD autonomous solver prompt block appended to unit prompts.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -18,9 +18,11 @@ import {
|
|||
getConfiguredAutonomousSolverMaxIterations,
|
||||
getSolverPhase,
|
||||
isNoOpExecutorTranscript,
|
||||
MAX_EXECUTOR_REFUSAL_ESCALATIONS,
|
||||
readAutonomousSolverState,
|
||||
readLatestAutonomousSolverCheckpoint,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
recordExecutorRefusalEscalation,
|
||||
} from "../autonomous-solver.js";
|
||||
|
||||
let tempDirs = [];
|
||||
|
|
@ -84,6 +86,50 @@ describe("autonomous solver", () => {
|
|||
expect(next.iteration).toBe(1);
|
||||
});
|
||||
|
||||
test("recordExecutorRefusalEscalation_increments_per_unit_and_resets_on_new_unit", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
|
||||
expect(
|
||||
recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
|
||||
).toBe(1);
|
||||
expect(
|
||||
recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
|
||||
).toBe(2);
|
||||
|
||||
const stateAfterT01 = readAutonomousSolverState(project);
|
||||
expect(stateAfterT01.executorRefusalEscalations).toBe(2);
|
||||
|
||||
// Same iteration advance preserves the counter (refusal budget is
|
||||
// per-unit, not per-iteration).
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
|
||||
2,
|
||||
);
|
||||
|
||||
// Switching to a new unit resets the counter — a fresh unit gets a
|
||||
// fresh budget.
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T02");
|
||||
expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
|
||||
0,
|
||||
);
|
||||
|
||||
// Mismatched unit ids do not mutate state and return 0.
|
||||
expect(
|
||||
recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
|
||||
).toBe(0);
|
||||
expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
|
||||
0,
|
||||
);
|
||||
});
|
||||
|
||||
test("MAX_EXECUTOR_REFUSAL_ESCALATIONS_is_a_positive_integer", () => {
|
||||
// Budget must be > 0 (otherwise the refusal handler never re-dispatches)
|
||||
// and must be a finite integer (used in comparisons).
|
||||
expect(Number.isInteger(MAX_EXECUTOR_REFUSAL_ESCALATIONS)).toBe(true);
|
||||
expect(MAX_EXECUTOR_REFUSAL_ESCALATIONS).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("appendAutonomousSolverCheckpoint_writes_pdd_projection_and_history", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue