feat(selection): thread unitType + failure_mode into fallback outcome records
- FallbackResolver.setUnitContext() stores {unitType,unitId} from autonomous dispatch
- run-unit.js calls pi.setFallbackUnitContext() before/after each unit
- _findAnyAvailableFallback uses real unitType/unitId from context, not sentinel
- Schema v59: failure_mode column in llm_task_outcomes
- insertLlmTaskOutcome accepts failure_mode (rate_limit, quota_exhausted, auth_error)
- register-hooks.js passes event.classification.reason as failure_mode
- register-hooks.js uses real event.unitId when available
- ExtensionRuntimeActions.setFallbackUnitContext added to pi API surface
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
009651e86f
commit
e50321b62b
7 changed files with 89 additions and 4 deletions
|
|
@ -464,6 +464,21 @@ export class AgentSession {
|
|||
return this._fallbackResolver;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the active unit context on the fallback resolver.
|
||||
*
|
||||
* Purpose: autonomous dispatch calls this before running each unit so that
|
||||
* model failure outcomes are attributed to the correct unit type (execute-task,
|
||||
* complete-slice, …) for phase-accurate Bayesian weighting.
|
||||
*
|
||||
* Consumer: SF extension pi.setFallbackUnitContext() from the autonomous loop.
|
||||
*/
|
||||
setFallbackUnitContext(
|
||||
ctx: { unitType: string; unitId: string } | null,
|
||||
): void {
|
||||
this._fallbackResolver.setUnitContext(ctx);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Event Subscription
|
||||
// =========================================================================
|
||||
|
|
@ -2391,6 +2406,7 @@ export class AgentSession {
|
|||
},
|
||||
getThinkingLevel: () => this.thinkingLevel,
|
||||
setThinkingLevel: (level) => this.setThinkingLevel(level),
|
||||
setFallbackUnitContext: (ctx) => this.setFallbackUnitContext(ctx),
|
||||
},
|
||||
{
|
||||
getModel: () => this.model,
|
||||
|
|
|
|||
|
|
@ -492,6 +492,7 @@ export function createExtensionRuntime(): ExtensionRuntime {
|
|||
Promise.reject(new Error("Extension runtime not initialized")),
|
||||
getThinkingLevel: notInitialized,
|
||||
setThinkingLevel: notInitialized,
|
||||
setFallbackUnitContext: () => {},
|
||||
flagValues: new Map(),
|
||||
pendingProviderRegistrations: [],
|
||||
// Pre-bind: queue registrations so bindCore() can flush them once the
|
||||
|
|
@ -683,6 +684,10 @@ function createExtensionAPI(
|
|||
runtime.setThinkingLevel(level);
|
||||
},
|
||||
|
||||
setFallbackUnitContext(ctx) {
|
||||
runtime.setFallbackUnitContext(ctx);
|
||||
},
|
||||
|
||||
registerProvider(name: string, config: ProviderConfig) {
|
||||
runtime.registerProvider(name, config);
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1767,6 +1767,15 @@ export interface ExtensionActions {
|
|||
) => Promise<boolean>;
|
||||
getThinkingLevel: () => ThinkingLevel;
|
||||
setThinkingLevel: (level: ThinkingLevel) => void;
|
||||
/**
|
||||
* Set the active unit context on the fallback resolver.
|
||||
* Call before dispatching each autonomous unit so model failure outcomes
|
||||
* are attributed to the correct unit type in the learning store.
|
||||
* Pass null to clear context after the unit completes.
|
||||
*/
|
||||
setFallbackUnitContext: (
|
||||
ctx: { unitType: string; unitId: string } | null,
|
||||
) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ type EmitBeforeModelSelect = (
|
|||
) => Promise<BeforeModelSelectResult | undefined>;
|
||||
|
||||
export class FallbackResolver {
|
||||
/** Current unit context set by autonomous dispatch before each unit runs. */
|
||||
private _unitContext: { unitType: string; unitId: string } | null = null;
|
||||
|
||||
constructor(
|
||||
private settingsManager: SettingsManager,
|
||||
private authStorage: AuthStorage,
|
||||
|
|
@ -40,6 +43,21 @@ export class FallbackResolver {
|
|||
private emitBeforeModelSelect?: EmitBeforeModelSelect,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Set the active unit context so fallback outcome records use the correct
|
||||
* unit_type/unit_id rather than a generic sentinel.
|
||||
*
|
||||
* Purpose: autonomous dispatch calls this before running each unit so that
|
||||
* any mid-unit model failure is attributed to the right unit type (execute-task,
|
||||
* complete-slice, plan-milestone, …) for accurate Bayesian weighting.
|
||||
*
|
||||
* Consumer: agent-session.ts setFallbackUnitContext, called from SF extension
|
||||
* autonomous loop via pi.setFallbackUnitContext().
|
||||
*/
|
||||
setUnitContext(ctx: { unitType: string; unitId: string } | null): void {
|
||||
this._unitContext = ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a fresh replacement for a model that just failed.
|
||||
* Ignores fallback chains and reselects from the current available registry.
|
||||
|
|
@ -168,9 +186,11 @@ export class FallbackResolver {
|
|||
// 2. Return the best outcome-weighted replacement
|
||||
if (this.emitBeforeModelSelect) {
|
||||
try {
|
||||
const unitType = this._unitContext?.unitType ?? "execute-task";
|
||||
const unitId = this._unitContext?.unitId ?? `fallback:${currentModel.provider}/${currentModel.id}`;
|
||||
const result = await this.emitBeforeModelSelect({
|
||||
unitType: "execute-task",
|
||||
unitId: "",
|
||||
unitType,
|
||||
unitId,
|
||||
classification: {
|
||||
tier: "standard",
|
||||
reason: errorType ?? "unknown",
|
||||
|
|
|
|||
|
|
@ -176,6 +176,11 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
|||
};
|
||||
}
|
||||
}
|
||||
// Tell FallbackResolver what unit is running so any mid-unit model failure
|
||||
// is attributed to the correct unit type in the learning store.
|
||||
if (typeof pi.setFallbackUnitContext === "function") {
|
||||
pi.setFallbackUnitContext({ unitType, unitId });
|
||||
}
|
||||
// ── Create the agent_end promise (per-unit one-shot) ──
|
||||
// When keepSession=false: clear the in-flight guard now that the new session
|
||||
// is fully ready, so handleAgentEnd processes events for this unit only.
|
||||
|
|
@ -285,6 +290,10 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
|||
Promise.race([unitPromise, timeoutResult]),
|
||||
);
|
||||
if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle);
|
||||
// Clear unit context — fallback after this point has no associated unit.
|
||||
if (typeof pi.setFallbackUnitContext === "function") {
|
||||
pi.setFallbackUnitContext(null);
|
||||
}
|
||||
debugLog("runUnit", {
|
||||
phase: "agent-end-received",
|
||||
unitType,
|
||||
|
|
|
|||
|
|
@ -1540,8 +1540,11 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
recordLearnedOutcome({
|
||||
model_id: event.currentModelId,
|
||||
unit_type: event.unitType ?? "execute-task",
|
||||
unit_id: `fallback:${event.currentModelId}`,
|
||||
// Use the real unit_id when the autonomous dispatch set it; fall back to a
|
||||
// sentinel so the NOT NULL constraint is always satisfied.
|
||||
unit_id: event.unitId || `fallback:${event.currentModelId}`,
|
||||
succeeded: false,
|
||||
failure_mode: event.classification?.reason ?? null,
|
||||
recorded_at: Date.now(),
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1309,6 +1309,7 @@ function initSchema(db, fileBacked) {
|
|||
duration_ms INTEGER DEFAULT NULL,
|
||||
tokens_total INTEGER DEFAULT NULL,
|
||||
cost_usd REAL DEFAULT NULL,
|
||||
failure_mode TEXT DEFAULT NULL,
|
||||
recorded_at INTEGER NOT NULL
|
||||
)
|
||||
`);
|
||||
|
|
@ -2365,6 +2366,7 @@ function migrateSchema(db) {
|
|||
duration_ms INTEGER DEFAULT NULL,
|
||||
tokens_total INTEGER DEFAULT NULL,
|
||||
cost_usd REAL DEFAULT NULL,
|
||||
failure_mode TEXT DEFAULT NULL,
|
||||
recorded_at INTEGER NOT NULL
|
||||
)
|
||||
`);
|
||||
|
|
@ -3210,6 +3212,23 @@ function migrateSchema(db) {
|
|||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 59) {
|
||||
// Schema v59: add failure_mode to llm_task_outcomes so the learning system
|
||||
// can differentiate transient failures (rate_limit) from hard failures
|
||||
// (quota_exhausted, auth_error) when weighting model demotions.
|
||||
ensureColumn(
|
||||
db,
|
||||
"llm_task_outcomes",
|
||||
"failure_mode",
|
||||
"ALTER TABLE llm_task_outcomes ADD COLUMN failure_mode TEXT DEFAULT NULL",
|
||||
);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 59,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -6078,6 +6097,7 @@ export function insertLlmTaskOutcome(input) {
|
|||
duration_ms,
|
||||
tokens_total,
|
||||
cost_usd,
|
||||
failure_mode,
|
||||
recorded_at
|
||||
) VALUES (
|
||||
:model_id,
|
||||
|
|
@ -6092,6 +6112,7 @@ export function insertLlmTaskOutcome(input) {
|
|||
:duration_ms,
|
||||
:tokens_total,
|
||||
:cost_usd,
|
||||
:failure_mode,
|
||||
:recorded_at
|
||||
)
|
||||
ON CONFLICT(unit_type, unit_id, recorded_at) DO UPDATE SET
|
||||
|
|
@ -6104,7 +6125,8 @@ export function insertLlmTaskOutcome(input) {
|
|||
blocker_discovered = excluded.blocker_discovered,
|
||||
duration_ms = excluded.duration_ms,
|
||||
tokens_total = excluded.tokens_total,
|
||||
cost_usd = excluded.cost_usd`)
|
||||
cost_usd = excluded.cost_usd,
|
||||
failure_mode = excluded.failure_mode`)
|
||||
.run({
|
||||
":model_id": input.modelId,
|
||||
":provider": input.provider,
|
||||
|
|
@ -6118,6 +6140,7 @@ export function insertLlmTaskOutcome(input) {
|
|||
":duration_ms": input.duration_ms ?? null,
|
||||
":tokens_total": input.tokens_total ?? null,
|
||||
":cost_usd": input.cost_usd ?? null,
|
||||
":failure_mode": input.failure_mode ?? null,
|
||||
":recorded_at": input.recorded_at ?? Date.now(),
|
||||
});
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue