diff --git a/.sf/REQUIREMENTS.md b/.sf/REQUIREMENTS.md index a69339c24..9169de044 100644 --- a/.sf/REQUIREMENTS.md +++ b/.sf/REQUIREMENTS.md @@ -612,3 +612,14 @@ ADR-0000 declares SF a **purpose-to-software compiler**. R036–R040 codify that - **T-level**: every task plan opens with "Implements S's success-criterion via concrete artifact ." - **Code-level**: every SF-authored commit message footer carries `purposeAnchor: ` (cross-cuts R036) - **Doctor**: a new `purpose-chain-integrity` check walks the chain and refuses any link missing its anchor (cross-cuts R031 ADR enforcement) + +### R049 — Multi-Provider Parallel Routing +- Class: differentiator +- Status: active +- Description: Across-unit parallel dispatch (R046) routes different concurrent units to different LLM providers based on (a) provider per-minute quota, (b) model specialty (kimi-for-coding for code-heavy units, minimax for synthesis, gemini for long-context research), (c) cost optimization (cheaper models for low-stakes units), (d) failover (avoid the provider that just rate-limited). The scheduler reads model-router config + live quota state and picks per-unit. Different models work simultaneously on different slices, spreading load across providers. +- Why it matters: Single-provider parallel maxes out at that provider's per-minute quota; multi-provider parallel scales N× as we add providers. Combined with R046, this is what makes the 2-4 week horizon collapse to days for embarrassingly parallel work (e.g. plan-slice across 20 independent slices). +- Source: spec +- Primary owning slice: unmapped (future "M036 Multi-Provider Parallel Routing") +- Supporting slices: none +- Validation: unmapped +- Notes: Builds on existing model-router.js scoring + R017's tool-failure demotion + R046's autonomous parallel dispatch. The new piece is the scheduler-level multi-model assignment per dispatch slot. diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index a03646cbd..f869469c1 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -27,7 +27,98 @@ import { countChangedFiles, resetRunawayGuardState, } from "../uok/auto-runaway-guard.js"; +import { DispatchLayer } from "../dispatch/dispatch-layer.js"; +import { isInlineEligible } from "../dispatch/run-unit-inline.js"; import { swarmDispatchAndWait } from "../uok/swarm-dispatch.js"; + +/** + * #M010/S03: Try inline-scope dispatch via DispatchLayer. + * + * Returns a UnitResult-shaped object if the inline path was taken; null if + * the unit isn't inline-eligible (caller falls through to swarm/legacy). + * + * On failure, returns a structured `{status: "cancelled", errorContext: {...}}` + * matching the contract that runUnitViaSwarm produces, so the autoLoop's + * downstream handling (resolveAgentEnd, finalize, etc.) works unchanged. + * + * Safe by default — only fires when env SF_INLINE_DISPATCH=1 AND the unit + * type is in INLINE_ELIGIBLE_UNITS. + */ +async function tryInlineDispatch(ctx, s, unitType, unitId, _prompt, options) { + if (!isInlineEligible(unitType)) return null; + const basePath = s.basePath ?? ctx.basePath ?? process.cwd(); + debugLog("runUnit", { + phase: "inline-route", + unitType, + unitId, + basePath, + }); + let layer; + try { + layer = new DispatchLayer(basePath); + } catch (err) { + debugLog("runUnit", { + phase: "inline-route-construct-failed", + unitType, + unitId, + error: err instanceof Error ? err.message : String(err), + }); + return null; // fall through to swarm + } + const dispatchOpts = { + isolation: "full", + coordination: "managed", + scope: "inline", + mode: "single", + unitType, + unitId, + ...(options?.model ? { model: options.model } : {}), + ...(options?.timeoutMs ? { timeoutMs: options.timeoutMs } : {}), + ...(options?.noOutputTimeoutMs + ? { noOutputTimeoutMs: options.noOutputTimeoutMs } + : {}), + ...(options?.signal ? { signal: options.signal } : {}), + ...(options?.extras ? { extras: options.extras } : {}), + }; + const result = await layer.dispatch(dispatchOpts); + if (result.ok) { + debugLog("runUnit", { + phase: "inline-route-completed", + unitType, + unitId, + outputLength: (result.output ?? "").length, + }); + return { + status: "completed", + event: { + messages: [ + { + role: "assistant", + content: [{ type: "text", text: result.output ?? "" }], + }, + ], + }, + _via: "inline", + }; + } + // Inline path returned a structured failure. Surface as cancelled UnitResult. + debugLog("runUnit", { + phase: "inline-route-failed", + unitType, + unitId, + exitCode: result.exitCode, + stderr: result.stderr, + }); + return { + status: "cancelled", + errorContext: { + message: result.stderr ?? "inline dispatch failed", + category: "inline-failure", + isTransient: false, + }, + _via: "inline", + }; +} import { logWarning } from "../workflow-logger.js"; import { _clearCurrentResolve, @@ -1116,6 +1207,23 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { * Default: false (each new unit starts with a clean session). */ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) { + // #M010/S03: Feature-flagged inline-scope path (env opt-in: + // SF_INLINE_DISPATCH=1). Routes inline-eligible unit types through + // DispatchLayer (M010/S02) → runUnitInline (M010/S01). Falls back to + // swarm/legacy paths when the env var isn't set OR the unit type isn't + // in INLINE_ELIGIBLE_UNITS. Safe by default — existing flows untouched. + if (process.env.SF_INLINE_DISPATCH === "1") { + const inline = await tryInlineDispatch( + ctx, + s, + unitType, + unitId, + prompt, + options, + ); + if (inline) return inline; + } + // Feature-flagged swarm path — default on in headless mode, opt-in elsewhere. if (shouldRouteRunUnitViaSwarm(options)) { return runUnitViaSwarm(ctx, pi, s, unitType, unitId, prompt, options);