feat(selection): thread unitType + failure_mode into fallback outcome records

- FallbackResolver.setUnitContext() stores {unitType,unitId} from autonomous dispatch - run-unit.js calls pi.setFallbackUnitContext() before/after each unit - _findAnyAvailableFallback uses real unitType/unitId from context, not sentinel - Schema v59: failure_mode column in llm_task_outcomes - insertLlmTaskOutcome accepts failure_mode (rate_limit, quota_exhausted, auth_error) - register-hooks.js passes event.classification.reason as failure_mode - register-hooks.js uses real event.unitId when available - ExtensionRuntimeActions.setFallbackUnitContext added to pi API surface Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-10 23:14:22 +02:00 · 2026-05-10 23:14:22 +02:00 · e50321b62b
commit e50321b62b
parent 009651e86f
7 changed files with 89 additions and 4 deletions
--- a/packages/coding-agent/src/core/agent-session.ts
+++ b/packages/coding-agent/src/core/agent-session.ts
@ -464,6 +464,21 @@ export class AgentSession {
 		return this._fallbackResolver;
 	}

+	/**
+	 * Set the active unit context on the fallback resolver.
+	 *
+	 * Purpose: autonomous dispatch calls this before running each unit so that
+	 * model failure outcomes are attributed to the correct unit type (execute-task,
+	 * complete-slice, …) for phase-accurate Bayesian weighting.
+	 *
+	 * Consumer: SF extension pi.setFallbackUnitContext() from the autonomous loop.
+	 */
+	setFallbackUnitContext(
+		ctx: { unitType: string; unitId: string } | null,
+	): void {
+		this._fallbackResolver.setUnitContext(ctx);
+	}
+
 	// =========================================================================
 	// Event Subscription
 	// =========================================================================
@ -2391,6 +2406,7 @@ export class AgentSession {
 				},
 				getThinkingLevel: () => this.thinkingLevel,
 				setThinkingLevel: (level) => this.setThinkingLevel(level),
+				setFallbackUnitContext: (ctx) => this.setFallbackUnitContext(ctx),
 			},
 			{
 				getModel: () => this.model,
--- a/packages/coding-agent/src/core/extensions/loader.ts
+++ b/packages/coding-agent/src/core/extensions/loader.ts
@ -492,6 +492,7 @@ export function createExtensionRuntime(): ExtensionRuntime {
 			Promise.reject(new Error("Extension runtime not initialized")),
 		getThinkingLevel: notInitialized,
 		setThinkingLevel: notInitialized,
+		setFallbackUnitContext: () => {},
 		flagValues: new Map(),
 		pendingProviderRegistrations: [],
 		// Pre-bind: queue registrations so bindCore() can flush them once the
@ -683,6 +684,10 @@ function createExtensionAPI(
 			runtime.setThinkingLevel(level);
 		},

+		setFallbackUnitContext(ctx) {
+			runtime.setFallbackUnitContext(ctx);
+		},
+
 		registerProvider(name: string, config: ProviderConfig) {
 			runtime.registerProvider(name, config);
 		},
--- a/packages/coding-agent/src/core/extensions/types.ts
+++ b/packages/coding-agent/src/core/extensions/types.ts
@ -1767,6 +1767,15 @@ export interface ExtensionActions {
 	) => Promise<boolean>;
 	getThinkingLevel: () => ThinkingLevel;
 	setThinkingLevel: (level: ThinkingLevel) => void;
+	/**
+	 * Set the active unit context on the fallback resolver.
+	 * Call before dispatching each autonomous unit so model failure outcomes
+	 * are attributed to the correct unit type in the learning store.
+	 * Pass null to clear context after the unit completes.
+	 */
+	setFallbackUnitContext: (
+		ctx: { unitType: string; unitId: string } | null,
+	) => void;
 }

 /**
--- a/packages/coding-agent/src/core/fallback-resolver.ts
+++ b/packages/coding-agent/src/core/fallback-resolver.ts
@ -31,6 +31,9 @@ type EmitBeforeModelSelect = (
 ) => Promise<BeforeModelSelectResult | undefined>;

 export class FallbackResolver {
+	/** Current unit context set by autonomous dispatch before each unit runs. */
+	private _unitContext: { unitType: string; unitId: string } | null = null;
+
 	constructor(
 		private settingsManager: SettingsManager,
 		private authStorage: AuthStorage,
@ -40,6 +43,21 @@ export class FallbackResolver {
 		private emitBeforeModelSelect?: EmitBeforeModelSelect,
 	) {}

+	/**
+	 * Set the active unit context so fallback outcome records use the correct
+	 * unit_type/unit_id rather than a generic sentinel.
+	 *
+	 * Purpose: autonomous dispatch calls this before running each unit so that
+	 * any mid-unit model failure is attributed to the right unit type (execute-task,
+	 * complete-slice, plan-milestone, …) for accurate Bayesian weighting.
+	 *
+	 * Consumer: agent-session.ts setFallbackUnitContext, called from SF extension
+	 * autonomous loop via pi.setFallbackUnitContext().
+	 */
+	setUnitContext(ctx: { unitType: string; unitId: string } | null): void {
+		this._unitContext = ctx;
+	}
+
 	/**
 	 * Find a fresh replacement for a model that just failed.
 	 * Ignores fallback chains and reselects from the current available registry.
@ -168,9 +186,11 @@ export class FallbackResolver {
 		// 2. Return the best outcome-weighted replacement
 		if (this.emitBeforeModelSelect) {
 			try {
+				const unitType = this._unitContext?.unitType ?? "execute-task";
+				const unitId = this._unitContext?.unitId ?? `fallback:${currentModel.provider}/${currentModel.id}`;
 				const result = await this.emitBeforeModelSelect({
-					unitType: "execute-task",
-					unitId: "",
+					unitType,
+					unitId,
 					classification: {
 						tier: "standard",
 						reason: errorType ?? "unknown",
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@ -176,6 +176,11 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 			};
 		}
 	}
+	// Tell FallbackResolver what unit is running so any mid-unit model failure
+	// is attributed to the correct unit type in the learning store.
+	if (typeof pi.setFallbackUnitContext === "function") {
+		pi.setFallbackUnitContext({ unitType, unitId });
+	}
 	// ── Create the agent_end promise (per-unit one-shot) ──
 	// When keepSession=false: clear the in-flight guard now that the new session
 	// is fully ready, so handleAgentEnd processes events for this unit only.
@ -285,6 +290,10 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 		Promise.race([unitPromise, timeoutResult]),
 	);
 	if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle);
+	// Clear unit context — fallback after this point has no associated unit.
+	if (typeof pi.setFallbackUnitContext === "function") {
+		pi.setFallbackUnitContext(null);
+	}
 	debugLog("runUnit", {
 		phase: "agent-end-received",
 		unitType,
--- a/src/resources/extensions/sf/bootstrap/register-hooks.js
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.js
@ -1540,8 +1540,11 @@ export function registerHooks(pi, ecosystemHandlers = []) {
 			recordLearnedOutcome({
 				model_id: event.currentModelId,
 				unit_type: event.unitType ?? "execute-task",
-				unit_id: `fallback:${event.currentModelId}`,
+				// Use the real unit_id when the autonomous dispatch set it; fall back to a
+				// sentinel so the NOT NULL constraint is always satisfied.
+				unit_id: event.unitId || `fallback:${event.currentModelId}`,
 				succeeded: false,
+				failure_mode: event.classification?.reason ?? null,
 				recorded_at: Date.now(),
 			});
 		}
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -1309,6 +1309,7 @@ function initSchema(db, fileBacked) {
        duration_ms INTEGER DEFAULT NULL,
        tokens_total INTEGER DEFAULT NULL,
        cost_usd REAL DEFAULT NULL,
+        failure_mode TEXT DEFAULT NULL,
        recorded_at INTEGER NOT NULL
      )
    `);
@ -2365,6 +2366,7 @@ function migrateSchema(db) {
          duration_ms INTEGER DEFAULT NULL,
          tokens_total INTEGER DEFAULT NULL,
          cost_usd REAL DEFAULT NULL,
+          failure_mode TEXT DEFAULT NULL,
          recorded_at INTEGER NOT NULL
        )
      `);
@ -3210,6 +3212,23 @@ function migrateSchema(db) {
 				":applied_at": new Date().toISOString(),
 			});
 		}
+		if (currentVersion < 59) {
+			// Schema v59: add failure_mode to llm_task_outcomes so the learning system
+			// can differentiate transient failures (rate_limit) from hard failures
+			// (quota_exhausted, auth_error) when weighting model demotions.
+			ensureColumn(
+				db,
+				"llm_task_outcomes",
+				"failure_mode",
+				"ALTER TABLE llm_task_outcomes ADD COLUMN failure_mode TEXT DEFAULT NULL",
+			);
+			db.prepare(
+				"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
+			).run({
+				":version": 59,
+				":applied_at": new Date().toISOString(),
+			});
+		}
 		db.exec("COMMIT");
 	} catch (err) {
 		db.exec("ROLLBACK");
@ -6078,6 +6097,7 @@ export function insertLlmTaskOutcome(input) {
         duration_ms,
         tokens_total,
         cost_usd,
+         failure_mode,
         recorded_at
       ) VALUES (
         :model_id,
@ -6092,6 +6112,7 @@ export function insertLlmTaskOutcome(input) {
         :duration_ms,
         :tokens_total,
         :cost_usd,
+         :failure_mode,
         :recorded_at
       )
       ON CONFLICT(unit_type, unit_id, recorded_at) DO UPDATE SET
@ -6104,7 +6125,8 @@ export function insertLlmTaskOutcome(input) {
         blocker_discovered = excluded.blocker_discovered,
         duration_ms = excluded.duration_ms,
         tokens_total = excluded.tokens_total,
-         cost_usd = excluded.cost_usd`)
+         cost_usd = excluded.cost_usd,
+         failure_mode = excluded.failure_mode`)
 			.run({
 				":model_id": input.modelId,
 				":provider": input.provider,
@ -6118,6 +6140,7 @@ export function insertLlmTaskOutcome(input) {
 				":duration_ms": input.duration_ms ?? null,
 				":tokens_total": input.tokens_total ?? null,
 				":cost_usd": input.cost_usd ?? null,
+				":failure_mode": input.failure_mode ?? null,
 				":recorded_at": input.recorded_at ?? Date.now(),
 			});
 		return true;