feat(selection): thread unitType + failure_mode into fallback outcome records

- FallbackResolver.setUnitContext() stores {unitType,unitId} from autonomous dispatch
- run-unit.js calls pi.setFallbackUnitContext() before/after each unit
- _findAnyAvailableFallback uses real unitType/unitId from context, not sentinel
- Schema v59: failure_mode column in llm_task_outcomes
- insertLlmTaskOutcome accepts failure_mode (rate_limit, quota_exhausted, auth_error)
- register-hooks.js passes event.classification.reason as failure_mode
- register-hooks.js uses real event.unitId when available
- ExtensionRuntimeActions.setFallbackUnitContext added to pi API surface

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-10 23:14:22 +02:00
parent 009651e86f
commit e50321b62b
7 changed files with 89 additions and 4 deletions

View file

@ -464,6 +464,21 @@ export class AgentSession {
return this._fallbackResolver;
}
/**
* Set the active unit context on the fallback resolver.
*
* Purpose: autonomous dispatch calls this before running each unit so that
* model failure outcomes are attributed to the correct unit type (execute-task,
* complete-slice, ) for phase-accurate Bayesian weighting.
*
* Consumer: SF extension pi.setFallbackUnitContext() from the autonomous loop.
*/
setFallbackUnitContext(
ctx: { unitType: string; unitId: string } | null,
): void {
this._fallbackResolver.setUnitContext(ctx);
}
// =========================================================================
// Event Subscription
// =========================================================================
@ -2391,6 +2406,7 @@ export class AgentSession {
},
getThinkingLevel: () => this.thinkingLevel,
setThinkingLevel: (level) => this.setThinkingLevel(level),
setFallbackUnitContext: (ctx) => this.setFallbackUnitContext(ctx),
},
{
getModel: () => this.model,

View file

@ -492,6 +492,7 @@ export function createExtensionRuntime(): ExtensionRuntime {
Promise.reject(new Error("Extension runtime not initialized")),
getThinkingLevel: notInitialized,
setThinkingLevel: notInitialized,
setFallbackUnitContext: () => {},
flagValues: new Map(),
pendingProviderRegistrations: [],
// Pre-bind: queue registrations so bindCore() can flush them once the
@ -683,6 +684,10 @@ function createExtensionAPI(
runtime.setThinkingLevel(level);
},
setFallbackUnitContext(ctx) {
runtime.setFallbackUnitContext(ctx);
},
registerProvider(name: string, config: ProviderConfig) {
runtime.registerProvider(name, config);
},

View file

@ -1767,6 +1767,15 @@ export interface ExtensionActions {
) => Promise<boolean>;
getThinkingLevel: () => ThinkingLevel;
setThinkingLevel: (level: ThinkingLevel) => void;
/**
* Set the active unit context on the fallback resolver.
* Call before dispatching each autonomous unit so model failure outcomes
* are attributed to the correct unit type in the learning store.
* Pass null to clear context after the unit completes.
*/
setFallbackUnitContext: (
ctx: { unitType: string; unitId: string } | null,
) => void;
}
/**

View file

@ -31,6 +31,9 @@ type EmitBeforeModelSelect = (
) => Promise<BeforeModelSelectResult | undefined>;
export class FallbackResolver {
/** Current unit context set by autonomous dispatch before each unit runs. */
private _unitContext: { unitType: string; unitId: string } | null = null;
constructor(
private settingsManager: SettingsManager,
private authStorage: AuthStorage,
@ -40,6 +43,21 @@ export class FallbackResolver {
private emitBeforeModelSelect?: EmitBeforeModelSelect,
) {}
/**
* Set the active unit context so fallback outcome records use the correct
* unit_type/unit_id rather than a generic sentinel.
*
* Purpose: autonomous dispatch calls this before running each unit so that
* any mid-unit model failure is attributed to the right unit type (execute-task,
* complete-slice, plan-milestone, ) for accurate Bayesian weighting.
*
* Consumer: agent-session.ts setFallbackUnitContext, called from SF extension
* autonomous loop via pi.setFallbackUnitContext().
*/
setUnitContext(ctx: { unitType: string; unitId: string } | null): void {
this._unitContext = ctx;
}
/**
* Find a fresh replacement for a model that just failed.
* Ignores fallback chains and reselects from the current available registry.
@ -168,9 +186,11 @@ export class FallbackResolver {
// 2. Return the best outcome-weighted replacement
if (this.emitBeforeModelSelect) {
try {
const unitType = this._unitContext?.unitType ?? "execute-task";
const unitId = this._unitContext?.unitId ?? `fallback:${currentModel.provider}/${currentModel.id}`;
const result = await this.emitBeforeModelSelect({
unitType: "execute-task",
unitId: "",
unitType,
unitId,
classification: {
tier: "standard",
reason: errorType ?? "unknown",

View file

@ -176,6 +176,11 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
};
}
}
// Tell FallbackResolver what unit is running so any mid-unit model failure
// is attributed to the correct unit type in the learning store.
if (typeof pi.setFallbackUnitContext === "function") {
pi.setFallbackUnitContext({ unitType, unitId });
}
// ── Create the agent_end promise (per-unit one-shot) ──
// When keepSession=false: clear the in-flight guard now that the new session
// is fully ready, so handleAgentEnd processes events for this unit only.
@ -285,6 +290,10 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
Promise.race([unitPromise, timeoutResult]),
);
if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle);
// Clear unit context — fallback after this point has no associated unit.
if (typeof pi.setFallbackUnitContext === "function") {
pi.setFallbackUnitContext(null);
}
debugLog("runUnit", {
phase: "agent-end-received",
unitType,

View file

@ -1540,8 +1540,11 @@ export function registerHooks(pi, ecosystemHandlers = []) {
recordLearnedOutcome({
model_id: event.currentModelId,
unit_type: event.unitType ?? "execute-task",
unit_id: `fallback:${event.currentModelId}`,
// Use the real unit_id when the autonomous dispatch set it; fall back to a
// sentinel so the NOT NULL constraint is always satisfied.
unit_id: event.unitId || `fallback:${event.currentModelId}`,
succeeded: false,
failure_mode: event.classification?.reason ?? null,
recorded_at: Date.now(),
});
}

View file

@ -1309,6 +1309,7 @@ function initSchema(db, fileBacked) {
duration_ms INTEGER DEFAULT NULL,
tokens_total INTEGER DEFAULT NULL,
cost_usd REAL DEFAULT NULL,
failure_mode TEXT DEFAULT NULL,
recorded_at INTEGER NOT NULL
)
`);
@ -2365,6 +2366,7 @@ function migrateSchema(db) {
duration_ms INTEGER DEFAULT NULL,
tokens_total INTEGER DEFAULT NULL,
cost_usd REAL DEFAULT NULL,
failure_mode TEXT DEFAULT NULL,
recorded_at INTEGER NOT NULL
)
`);
@ -3210,6 +3212,23 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 59) {
// Schema v59: add failure_mode to llm_task_outcomes so the learning system
// can differentiate transient failures (rate_limit) from hard failures
// (quota_exhausted, auth_error) when weighting model demotions.
ensureColumn(
db,
"llm_task_outcomes",
"failure_mode",
"ALTER TABLE llm_task_outcomes ADD COLUMN failure_mode TEXT DEFAULT NULL",
);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 59,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -6078,6 +6097,7 @@ export function insertLlmTaskOutcome(input) {
duration_ms,
tokens_total,
cost_usd,
failure_mode,
recorded_at
) VALUES (
:model_id,
@ -6092,6 +6112,7 @@ export function insertLlmTaskOutcome(input) {
:duration_ms,
:tokens_total,
:cost_usd,
:failure_mode,
:recorded_at
)
ON CONFLICT(unit_type, unit_id, recorded_at) DO UPDATE SET
@ -6104,7 +6125,8 @@ export function insertLlmTaskOutcome(input) {
blocker_discovered = excluded.blocker_discovered,
duration_ms = excluded.duration_ms,
tokens_total = excluded.tokens_total,
cost_usd = excluded.cost_usd`)
cost_usd = excluded.cost_usd,
failure_mode = excluded.failure_mode`)
.run({
":model_id": input.modelId,
":provider": input.provider,
@ -6118,6 +6140,7 @@ export function insertLlmTaskOutcome(input) {
":duration_ms": input.duration_ms ?? null,
":tokens_total": input.tokens_total ?? null,
":cost_usd": input.cost_usd ?? null,
":failure_mode": input.failure_mode ?? null,
":recorded_at": input.recorded_at ?? Date.now(),
});
return true;