diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index 7567d9f61..78099e793 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -464,6 +464,21 @@ export class AgentSession { return this._fallbackResolver; } + /** + * Set the active unit context on the fallback resolver. + * + * Purpose: autonomous dispatch calls this before running each unit so that + * model failure outcomes are attributed to the correct unit type (execute-task, + * complete-slice, …) for phase-accurate Bayesian weighting. + * + * Consumer: SF extension pi.setFallbackUnitContext() from the autonomous loop. + */ + setFallbackUnitContext( + ctx: { unitType: string; unitId: string } | null, + ): void { + this._fallbackResolver.setUnitContext(ctx); + } + // ========================================================================= // Event Subscription // ========================================================================= @@ -2391,6 +2406,7 @@ export class AgentSession { }, getThinkingLevel: () => this.thinkingLevel, setThinkingLevel: (level) => this.setThinkingLevel(level), + setFallbackUnitContext: (ctx) => this.setFallbackUnitContext(ctx), }, { getModel: () => this.model, diff --git a/packages/coding-agent/src/core/extensions/loader.ts b/packages/coding-agent/src/core/extensions/loader.ts index 3fa82476e..eb71819a9 100644 --- a/packages/coding-agent/src/core/extensions/loader.ts +++ b/packages/coding-agent/src/core/extensions/loader.ts @@ -492,6 +492,7 @@ export function createExtensionRuntime(): ExtensionRuntime { Promise.reject(new Error("Extension runtime not initialized")), getThinkingLevel: notInitialized, setThinkingLevel: notInitialized, + setFallbackUnitContext: () => {}, flagValues: new Map(), pendingProviderRegistrations: [], // Pre-bind: queue registrations so bindCore() can flush them once the @@ -683,6 +684,10 @@ function createExtensionAPI( runtime.setThinkingLevel(level); }, + setFallbackUnitContext(ctx) { + runtime.setFallbackUnitContext(ctx); + }, + registerProvider(name: string, config: ProviderConfig) { runtime.registerProvider(name, config); }, diff --git a/packages/coding-agent/src/core/extensions/types.ts b/packages/coding-agent/src/core/extensions/types.ts index 259cc6134..03e620631 100644 --- a/packages/coding-agent/src/core/extensions/types.ts +++ b/packages/coding-agent/src/core/extensions/types.ts @@ -1767,6 +1767,15 @@ export interface ExtensionActions { ) => Promise; getThinkingLevel: () => ThinkingLevel; setThinkingLevel: (level: ThinkingLevel) => void; + /** + * Set the active unit context on the fallback resolver. + * Call before dispatching each autonomous unit so model failure outcomes + * are attributed to the correct unit type in the learning store. + * Pass null to clear context after the unit completes. + */ + setFallbackUnitContext: ( + ctx: { unitType: string; unitId: string } | null, + ) => void; } /** diff --git a/packages/coding-agent/src/core/fallback-resolver.ts b/packages/coding-agent/src/core/fallback-resolver.ts index f7c1994bd..abaa64dbf 100644 --- a/packages/coding-agent/src/core/fallback-resolver.ts +++ b/packages/coding-agent/src/core/fallback-resolver.ts @@ -31,6 +31,9 @@ type EmitBeforeModelSelect = ( ) => Promise; export class FallbackResolver { + /** Current unit context set by autonomous dispatch before each unit runs. */ + private _unitContext: { unitType: string; unitId: string } | null = null; + constructor( private settingsManager: SettingsManager, private authStorage: AuthStorage, @@ -40,6 +43,21 @@ export class FallbackResolver { private emitBeforeModelSelect?: EmitBeforeModelSelect, ) {} + /** + * Set the active unit context so fallback outcome records use the correct + * unit_type/unit_id rather than a generic sentinel. + * + * Purpose: autonomous dispatch calls this before running each unit so that + * any mid-unit model failure is attributed to the right unit type (execute-task, + * complete-slice, plan-milestone, …) for accurate Bayesian weighting. + * + * Consumer: agent-session.ts setFallbackUnitContext, called from SF extension + * autonomous loop via pi.setFallbackUnitContext(). + */ + setUnitContext(ctx: { unitType: string; unitId: string } | null): void { + this._unitContext = ctx; + } + /** * Find a fresh replacement for a model that just failed. * Ignores fallback chains and reselects from the current available registry. @@ -168,9 +186,11 @@ export class FallbackResolver { // 2. Return the best outcome-weighted replacement if (this.emitBeforeModelSelect) { try { + const unitType = this._unitContext?.unitType ?? "execute-task"; + const unitId = this._unitContext?.unitId ?? `fallback:${currentModel.provider}/${currentModel.id}`; const result = await this.emitBeforeModelSelect({ - unitType: "execute-task", - unitId: "", + unitType, + unitId, classification: { tier: "standard", reason: errorType ?? "unknown", diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index 87fd4aed7..b01da8878 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -176,6 +176,11 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) { }; } } + // Tell FallbackResolver what unit is running so any mid-unit model failure + // is attributed to the correct unit type in the learning store. + if (typeof pi.setFallbackUnitContext === "function") { + pi.setFallbackUnitContext({ unitType, unitId }); + } // ── Create the agent_end promise (per-unit one-shot) ── // When keepSession=false: clear the in-flight guard now that the new session // is fully ready, so handleAgentEnd processes events for this unit only. @@ -285,6 +290,10 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) { Promise.race([unitPromise, timeoutResult]), ); if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle); + // Clear unit context — fallback after this point has no associated unit. + if (typeof pi.setFallbackUnitContext === "function") { + pi.setFallbackUnitContext(null); + } debugLog("runUnit", { phase: "agent-end-received", unitType, diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 1a2faf485..4f0affaf8 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -1540,8 +1540,11 @@ export function registerHooks(pi, ecosystemHandlers = []) { recordLearnedOutcome({ model_id: event.currentModelId, unit_type: event.unitType ?? "execute-task", - unit_id: `fallback:${event.currentModelId}`, + // Use the real unit_id when the autonomous dispatch set it; fall back to a + // sentinel so the NOT NULL constraint is always satisfied. + unit_id: event.unitId || `fallback:${event.currentModelId}`, succeeded: false, + failure_mode: event.classification?.reason ?? null, recorded_at: Date.now(), }); } diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index 8579753cb..446f15ae9 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -1309,6 +1309,7 @@ function initSchema(db, fileBacked) { duration_ms INTEGER DEFAULT NULL, tokens_total INTEGER DEFAULT NULL, cost_usd REAL DEFAULT NULL, + failure_mode TEXT DEFAULT NULL, recorded_at INTEGER NOT NULL ) `); @@ -2365,6 +2366,7 @@ function migrateSchema(db) { duration_ms INTEGER DEFAULT NULL, tokens_total INTEGER DEFAULT NULL, cost_usd REAL DEFAULT NULL, + failure_mode TEXT DEFAULT NULL, recorded_at INTEGER NOT NULL ) `); @@ -3210,6 +3212,23 @@ function migrateSchema(db) { ":applied_at": new Date().toISOString(), }); } + if (currentVersion < 59) { + // Schema v59: add failure_mode to llm_task_outcomes so the learning system + // can differentiate transient failures (rate_limit) from hard failures + // (quota_exhausted, auth_error) when weighting model demotions. + ensureColumn( + db, + "llm_task_outcomes", + "failure_mode", + "ALTER TABLE llm_task_outcomes ADD COLUMN failure_mode TEXT DEFAULT NULL", + ); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 59, + ":applied_at": new Date().toISOString(), + }); + } db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -6078,6 +6097,7 @@ export function insertLlmTaskOutcome(input) { duration_ms, tokens_total, cost_usd, + failure_mode, recorded_at ) VALUES ( :model_id, @@ -6092,6 +6112,7 @@ export function insertLlmTaskOutcome(input) { :duration_ms, :tokens_total, :cost_usd, + :failure_mode, :recorded_at ) ON CONFLICT(unit_type, unit_id, recorded_at) DO UPDATE SET @@ -6104,7 +6125,8 @@ export function insertLlmTaskOutcome(input) { blocker_discovered = excluded.blocker_discovered, duration_ms = excluded.duration_ms, tokens_total = excluded.tokens_total, - cost_usd = excluded.cost_usd`) + cost_usd = excluded.cost_usd, + failure_mode = excluded.failure_mode`) .run({ ":model_id": input.modelId, ":provider": input.provider, @@ -6118,6 +6140,7 @@ export function insertLlmTaskOutcome(input) { ":duration_ms": input.duration_ms ?? null, ":tokens_total": input.tokens_total ?? null, ":cost_usd": input.cost_usd ?? null, + ":failure_mode": input.failure_mode ?? null, ":recorded_at": input.recorded_at ?? Date.now(), }); return true;