fix(auto): re-dispatch on executor refusal instead of pausing

The autonomous solver was designed precisely to handle executor refusals (per its own docstring: "the solver role MUST stay on a stable, agentic, refusal-resistant model independent of any per-unit routing choices"), but the refusal handler short-circuited past it and emitted a `blocked` checkpoint, which assessAutonomousSolverTurn unconditionally turns into a `pause` — defeating autonomous mode every time the router selects a capability-mismatched executor. The 1h model-block added in 3f2babb5d was the right primitive but had no consumer: nothing actually re-dispatched the unit after the model was blocked, so the block only mattered if the operator manually unpaused and retried. This change wires the missing consumer: - Add per-unit `executorRefusalEscalations` counter to solver state plus a `recordExecutorRefusalEscalation` helper. Counter persists across iterations of the same unit and resets on unit change. - On `executor-refused`: block the refusing model and slice-routing entry (unchanged), file self-feedback (unchanged), then synthesize a `continue` checkpoint and return `{ action: "continue" }` directly so the auto loop re-dispatches the unit. selectAndApplyModel will skip the now-blocked model and pick a higher-tier fallback. - Bounded by `MAX_EXECUTOR_REFUSAL_ESCALATIONS=3`. When the budget is exhausted (an entire fallback chain refused on the same unit), fall back to the legacy blocked-and-pause path so the operator can review. - Bypass `assessAutonomousSolverTurn` on the refusal-continue path because its no-op detector would (correctly) reject a continue over a refusal transcript — but here the "no-op" is the whole point: we are explicitly swapping the routed model. Tests cover the new state field's init/persistence/reset semantics and the constant's invariants. Full SF extension suite (1369 tests) passes. Refs: sf-mp3bm6u0-2fskt8 (now fully addressed, not just AC1) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 21:49:51 +02:00 · 2026-05-13 21:49:51 +02:00 · 5a2618c05d
commit 5a2618c05d
parent 288a2a5fd7
3 changed files with 215 additions and 43 deletions
--- a/src/resources/extensions/sf/auto/phases-unit.js
+++ b/src/resources/extensions/sf/auto/phases-unit.js
@ -35,8 +35,10 @@ import {
 	classifyExecutorRefusal,
 	consumePendingAutonomousSolverSteering,
 	getConfiguredAutonomousSolverMaxIterations,
+	MAX_EXECUTOR_REFUSAL_ESCALATIONS,
 	readAutonomousSolverState,
 	recordAutonomousSolverMissingCheckpointRetry,
+	recordExecutorRefusalEscalation,
 } from "../autonomous-solver.js";
 import { blockModel } from "../blocked-models.js";
 import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
@ -745,8 +747,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 			? { action: "none" }
 			: { action: "pending" };

-	// Refusal short-circuit: when the executor model returned a generic refusal,
-	// synthesize a blocked checkpoint immediately and skip the solver pass.
+	// Refusal handling: when the executor model returned a generic refusal, the
+	// model is capability-mismatched for this unit. Block it (so selectAndApplyModel
+	// excludes it on the next dispatch), evict slice routing, file self-feedback,
+	// and re-dispatch with a tier-escalated model — bounded by MAX_EXECUTOR_REFUSAL_
+	// ESCALATIONS so a fallback chain of refusing models cannot loop forever. Only
+	// when the escalation budget is exhausted do we fall back to a blocked
+	// checkpoint that pauses the loop for operator intervention.
 	if (unitResult.status !== "cancelled" && refusal) {
 		const executorModel =
 			s.currentUnitModel?.provider && s.currentUnitModel?.id
@ -760,9 +767,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 		} catch {
 			// best-effort
 		}
-		// Temporarily block the refusing model so the router skips it on retry.
-		// This satisfies AC1 of sf-mp3bm6u0-2fskt8: the executor model is
-		// escalated because the blocked model will be excluded from selection.
+		// Block the refusing model so the router skips it on retry. The next
+		// selectAndApplyModel call will pick a higher-tier fallback.
 		try {
 			const refusedProvider = s.currentUnitModel?.provider ?? "";
 			const refusedId = s.currentUnitModel?.id ?? "";
@ -778,46 +784,14 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 		} catch {
 			// best-effort — blocking must not break the refusal handler
 		}
-		try {
-			appendAutonomousSolverCheckpoint(s.basePath, {
-				unitType,
-				unitId,
-				outcome: "blocked",
-				summary: `Executor (${executorModel}) refused the task. Pattern: ${refusal.pattern}. The model has been temporarily blocked and will be skipped on retry; escalate the executor model or unblock this unit manually.`,
-				completedItems: [],
-				remainingItems: [
-					`Re-run ${unitType} ${unitId} with a more capable executor model — current routing selected an incapable model.`,
-				],
-				verificationEvidence: [
-					`executor-refusal-pattern=${refusal.pattern}`,
-					`executor-model=${executorModel}`,
-				],
-				blockerReason: `executor-refused (${refusal.pattern})`,
-				pdd: {
-					purpose:
-						"Surface executor refusals as protocol-level blockers instead of synthesizing fake progress.",
-					consumer: "autonomous loop pause-handler",
-					contract:
-						"On `executor-refused`, the loop pauses and self-feedback is filed; the operator must escalate the executor model.",
-					failureBoundary:
-						"If the operator does not escalate, the same refusal will recur on next dispatch.",
-					evidence: "classifyExecutorRefusal matched a refusal pattern",
-					nonGoals:
-						"This does not retry the unit automatically — capability mismatches require operator judgement (or a future automatic escalation policy).",
-					invariants: "Refusal never silently synthesizes a continue.",
-					assumptions:
-						"The refusal pattern set in classifyExecutorRefusal is conservative — false positives are rare and require operator review.",
-				},
-			});
-		} catch {
-			// If synthesis fails, fall through to solver pass
-		}
+		// File self-feedback for observability (operator-visible signal that
+		// this unit type is being routed to capability-mismatched models).
 		try {
 			const feedback = recordSelfFeedback(
 				{
 					kind: "executor-refused",
 					severity: "high",
-					summary: `Executor ${executorModel} refused ${unitType} ${unitId} with pattern ${refusal.pattern}; loop paused to prevent fake-progress synthesis.`,
+					summary: `Executor ${executorModel} refused ${unitType} ${unitId} with pattern ${refusal.pattern}; model blocked and re-dispatching with tier escalation.`,
 					evidence: [
 						`unit=${unitType} ${unitId}`,
 						`executor=${executorModel}`,
@ -826,9 +800,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 						refusal.evidence ?? "",
 					].join("\n"),
 					suggestedFix:
-						"Escalate the executor model for this unit (or unit type) — the currently routed model lacks the agentic capabilities required. Long-term: separate the executor and autonomous-solver roles per ADR-0079 and pin the solver to a stable agentic model.",
+						"Routing repeatedly selects a capability-mismatched executor for this unit type. Update the router's tier-floor for this unit type so the refusing tier is excluded by default, or add the refusing model to a permanent block list.",
 					acceptanceCriteria: [
-						"Executor model for this unit type is escalated to a model that passes the refusal-resistant tier.",
+						"Router's effective tier-floor for this unit type excludes the refusing model class without requiring a runtime block.",
 						"Refusal pattern is added to classifyExecutorRefusal if a novel phrasing slipped through.",
 					],
 					occurredIn: { unitType, unitId },
@ -853,8 +827,120 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 		} catch {
 			// self-feedback is observability; never block loop progression on it
 		}
+		// Bounded re-dispatch: if the per-unit escalation budget is not yet
+		// exhausted, synthesize a `continue` checkpoint and return the
+		// re-dispatch action directly. We bypass assessAutonomousSolverTurn
+		// here because the no-op detector would (correctly) reject a continue
+		// over a refusal transcript — but in this case the "no-op" is the whole
+		// point, since we are explicitly changing the routed model on retry.
+		const escalation = recordExecutorRefusalEscalation(
+			s.basePath,
+			unitType,
+			unitId,
+		);
+		if (escalation <= MAX_EXECUTOR_REFUSAL_ESCALATIONS) {
+			try {
+				appendAutonomousSolverCheckpoint(s.basePath, {
+					unitType,
+					unitId,
+					outcome: "continue",
+					summary: `Executor (${executorModel}) refused. Pattern: ${refusal.pattern}. Model blocked; re-dispatching with model excluded (escalation ${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}).`,
+					completedItems: [],
+					remainingItems: [
+						`Re-dispatch ${unitType} ${unitId}; refusing model is blocked so selectAndApplyModel will pick a higher-tier fallback.`,
+					],
+					verificationEvidence: [
+						`executor-refusal-pattern=${refusal.pattern}`,
+						`executor-model=${executorModel}`,
+						`refusal-escalation=${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}`,
+					],
+					pdd: {
+						purpose:
+							"Auto-escalate capability-mismatched executor selections by blocking the refusing model and re-dispatching, so SF completes work without operator intervention when a higher-tier model exists in the fallback chain.",
+						consumer: "autonomous loop continue handler",
+						contract:
+							"On `executor-refused`, block the refusing model and emit `continue` so the loop re-dispatches with the blocked model excluded — bounded by MAX_EXECUTOR_REFUSAL_ESCALATIONS to prevent runaway loops over an all-refusing fallback chain.",
+						failureBoundary:
+							"After MAX_EXECUTOR_REFUSAL_ESCALATIONS refusals on the same unit, fall back to the legacy blocked-and-pause path so the operator can intervene.",
+						evidence:
+							"classifyExecutorRefusal matched a refusal pattern; the responsible model is now in blocked-models.json with a 1-hour TTL.",
+						nonGoals:
+							"This does not change the router's tier-floor — repeated refusals across units indicate the router still needs tuning (filed via self-feedback).",
+						invariants:
+							"The refusing model is never silently retried; it is always blocked before re-dispatch.",
+						assumptions:
+							"The fallback chain in effectiveModelConfig contains at least one higher-tier model that does not also refuse.",
+					},
+				});
+			} catch {
+				// If synthesis fails, fall through to the budget-exhausted branch
+				// below so the loop still has a defined outcome.
+			}
+			deps.emitJournalEvent({
+				ts: new Date().toISOString(),
+				flowId: ic.flowId,
+				seq: ic.nextSeq(),
+				eventType: "executor-refused-redispatch",
+				data: {
+					unitType,
+					unitId,
+					executorModel,
+					pattern: refusal.pattern,
+					escalation,
+					maxEscalations: MAX_EXECUTOR_REFUSAL_ESCALATIONS,
+				},
+			});
+			ctx.ui.notify(
+				`Executor ${executorModel} refused ${unitType} ${unitId} (${refusal.pattern}); blocked and re-dispatching (escalation ${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}).`,
+				"warning",
+			);
+			return {
+				action: "continue",
+				data: {
+					unitStartedAt: s.currentUnit?.startedAt,
+					requestDispatchedAt: unitResult.requestDispatchedAt,
+				},
+			};
+		}
+		// Escalation budget exhausted: emit the legacy blocked checkpoint and
+		// let the existing pause path take over so the operator can intervene.
+		try {
+			appendAutonomousSolverCheckpoint(s.basePath, {
+				unitType,
+				unitId,
+				outcome: "blocked",
+				summary: `Executor (${executorModel}) refused the task. Pattern: ${refusal.pattern}. Refusal-escalation budget exhausted (${escalation}/${MAX_EXECUTOR_REFUSAL_ESCALATIONS}) — every model tried in the fallback chain refused. Operator must escalate routing or add a permanent block.`,
+				completedItems: [],
+				remainingItems: [
+					`Re-run ${unitType} ${unitId} with a more capable executor model — the entire ${MAX_EXECUTOR_REFUSAL_ESCALATIONS}-step fallback chain refused.`,
+				],
+				verificationEvidence: [
+					`executor-refusal-pattern=${refusal.pattern}`,
+					`executor-model=${executorModel}`,
+					`refusal-escalations-exhausted=${escalation}`,
+				],
+				blockerReason: `executor-refused-budget-exhausted (${refusal.pattern})`,
+				pdd: {
+					purpose:
+						"Surface executor refusals as protocol-level blockers when bounded auto-escalation has been exhausted.",
+					consumer: "autonomous loop pause-handler",
+					contract:
+						"After MAX_EXECUTOR_REFUSAL_ESCALATIONS refusals on the same unit, pause the loop and require operator intervention.",
+					failureBoundary:
+						"If the operator does not escalate, the same refusal will recur on next dispatch.",
+					evidence: `${escalation} consecutive executor refusals on this unit`,
+					nonGoals:
+						"This does not retry the unit automatically beyond the budget — capability mismatches that defeat the entire fallback chain require operator judgement.",
+					invariants: "Refusal never silently synthesizes a continue.",
+					assumptions:
+						"The refusal pattern set in classifyExecutorRefusal is conservative — false positives are rare and require operator review.",
+				},
+			});
+		} catch {
+			// If synthesis fails, fall through to solver pass
+		}
 		ctx.ui.notify(
-			`Executor ${executorModel} refused ${unitType} ${unitId} (${refusal.pattern}); autonomous loop pausing instead of synthesizing fake progress. See SELF-FEEDBACK.md for escalation guidance.`,
+			`Executor refused ${unitType} ${unitId} after ${MAX_EXECUTOR_REFUSAL_ESCALATIONS} tier escalations; pausing for operator review. See SELF-FEEDBACK.md.`,
 			"error",
 		);
 		solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId);
--- a/src/resources/extensions/sf/autonomous-solver.js
+++ b/src/resources/extensions/sf/autonomous-solver.js
@ -269,11 +269,51 @@ export function beginAutonomousSolverIteration(
 			: [],
 		// Safety cap: how many checkpoints have been written this iteration
 		checkpointCountThisIteration: 0,
+		// Per-unit budget for executor-refusal-driven re-dispatches. Reset when
+		// the unit changes; persists across iterations of the same unit so a
+		// resumed run does not silently get a fresh budget.
+		executorRefusalEscalations: sameUnit(existing, unitType, unitId)
+			? Number(existing.executorRefusalEscalations) || 0
+			: 0,
 	};
 	writeState(basePath, state);
 	return state;
 }

+/**
+ * Maximum number of executor-refusal-driven re-dispatches for a single unit
+ * before the loop falls back to the legacy blocked-and-pause behavior. Three
+ * gives the router two tier escalations after the initial refusal (typical
+ * tier ladder depth), which is plenty without enabling runaway loops if every
+ * fallback model also refuses.
+ */
+export const MAX_EXECUTOR_REFUSAL_ESCALATIONS = 3;
+
+/**
+ * Increment the per-unit executor-refusal escalation counter and return the
+ * new count.
+ *
+ * Purpose: when classifyExecutorRefusal fires, the refusal handler blocks the
+ * refusing model and re-dispatches the unit so selectAndApplyModel picks a
+ * higher-tier alternative. This counter bounds that retry loop so an entire
+ * fallback chain of refusing models cannot loop forever — once the budget is
+ * exhausted, the unit pauses for operator intervention via the legacy
+ * blocked-checkpoint path.
+ *
+ * Consumer: runUnitPhase refusal branch in auto/phases-unit.js.
+ */
+export function recordExecutorRefusalEscalation(basePath, unitType, unitId) {
+	const state = readJson(statePath(basePath));
+	if (!sameUnit(state, unitType, unitId)) return 0;
+	const next = (Number(state.executorRefusalEscalations) || 0) + 1;
+	writeState(basePath, {
+		...state,
+		executorRefusalEscalations: next,
+		updatedAt: nowIso(),
+	});
+	return next;
+}
+
 /**
 * Build the PDD autonomous solver prompt block appended to unit prompts.
 *
--- a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
+++ b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
@ -18,9 +18,11 @@ import {
 	getConfiguredAutonomousSolverMaxIterations,
 	getSolverPhase,
 	isNoOpExecutorTranscript,
+	MAX_EXECUTOR_REFUSAL_ESCALATIONS,
 	readAutonomousSolverState,
 	readLatestAutonomousSolverCheckpoint,
 	recordAutonomousSolverMissingCheckpointRetry,
+	recordExecutorRefusalEscalation,
 } from "../autonomous-solver.js";

 let tempDirs = [];
@ -84,6 +86,50 @@ describe("autonomous solver", () => {
 		expect(next.iteration).toBe(1);
 	});

+	test("recordExecutorRefusalEscalation_increments_per_unit_and_resets_on_new_unit", () => {
+		const project = makeProject();
+		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
+
+		expect(
+			recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
+		).toBe(1);
+		expect(
+			recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
+		).toBe(2);
+
+		const stateAfterT01 = readAutonomousSolverState(project);
+		expect(stateAfterT01.executorRefusalEscalations).toBe(2);
+
+		// Same iteration advance preserves the counter (refusal budget is
+		// per-unit, not per-iteration).
+		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
+		expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
+			2,
+		);
+
+		// Switching to a new unit resets the counter — a fresh unit gets a
+		// fresh budget.
+		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T02");
+		expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
+			0,
+		);
+
+		// Mismatched unit ids do not mutate state and return 0.
+		expect(
+			recordExecutorRefusalEscalation(project, "execute-task", "M001/S01/T01"),
+		).toBe(0);
+		expect(readAutonomousSolverState(project).executorRefusalEscalations).toBe(
+			0,
+		);
+	});
+
+	test("MAX_EXECUTOR_REFUSAL_ESCALATIONS_is_a_positive_integer", () => {
+		// Budget must be > 0 (otherwise the refusal handler never re-dispatches)
+		// and must be a finite integer (used in comparisons).
+		expect(Number.isInteger(MAX_EXECUTOR_REFUSAL_ESCALATIONS)).toBe(true);
+		expect(MAX_EXECUTOR_REFUSAL_ESCALATIONS).toBeGreaterThan(0);
+	});
+
 	test("appendAutonomousSolverCheckpoint_writes_pdd_projection_and_history", () => {
 		const project = makeProject();
 		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");