diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts index c369d23a7..c3311085b 100644 --- a/src/resources/extensions/gsd/auto-model-selection.ts +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -15,6 +15,7 @@ import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabil import { getLedger, getProjectTotals } from "./metrics.js"; import { unitPhaseLabel } from "./auto-dashboard.js"; import { getSessionModelOverride } from "./session-model-override.js"; +import { logWarning } from "./workflow-logger.js"; export interface ModelSelectionResult { /** Routing metadata for metrics recording */ @@ -25,9 +26,7 @@ export interface ModelSelectionResult { export function resolvePreferredModelConfig( unitType: string, - autoModeStartModel: { provider: string; id: string } | null, - /** When false, only return explicit per-phase model configs — do not - * synthesize a routing ceiling from dynamic_routing.tier_models (#3962). */ + autoModeStartModel: { provider: string; id: string; flatRateCtx?: FlatRateContext } | null, isAutoMode = true, ) { const explicitConfig = resolveModelWithFallbacksForUnit(unitType); @@ -41,7 +40,7 @@ export function resolvePreferredModelConfig( if (!routingConfig.enabled || !routingConfig.tier_models) return undefined; // Don't synthesize a routing config for flat-rate providers (#3453). - if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) return undefined; + if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider, autoModeStartModel.flatRateCtx)) return undefined; const ceilingModel = routingConfig.tier_models.heavy ?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined); @@ -68,7 +67,7 @@ export async function selectAndApplyModel( basePath: string, prefs: GSDPreferences | undefined, verbose: boolean, - autoModeStartModel: { provider: string; id: string } | null, + autoModeStartModel: { provider: string; id: string; flatRateCtx?: FlatRateContext } | null, retryContext?: { isRetry: boolean; previousTier?: string }, /** When false (interactive/guided-flow), skip dynamic routing and use the session model. * Dynamic routing only applies in auto-mode where cost optimization is expected. (#3962) */ @@ -79,6 +78,17 @@ export async function selectAndApplyModel( const effectiveSessionModelOverride = sessionModelOverride === undefined ? getSessionModelOverride(ctx.sessionManager.getSessionId()) : (sessionModelOverride ?? undefined); + // Enrich the start model with a flat-rate context up front so routing + // synthesis and the dispatch-time guard see the same signals (built-in + // list + user `flat_rate_providers` preference + externalCli auto- + // detection). The dispatch-time primary-model check below builds its + // own per-provider context when it has a resolved primary model. + if (autoModeStartModel) { + autoModeStartModel = { + ...autoModeStartModel, + flatRateCtx: buildFlatRateContext(autoModeStartModel.provider, ctx, prefs), + }; + } const modelConfig = effectiveSessionModelOverride ? undefined : resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode); @@ -107,12 +117,16 @@ export async function selectAndApplyModel( if (routingConfig.enabled) { const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider); if (primaryModel) { - if (isFlatRateProvider(primaryModel.provider)) { + const primaryFlatRateCtx = buildFlatRateContext(primaryModel.provider, ctx, prefs); + if (isFlatRateProvider(primaryModel.provider, primaryFlatRateCtx)) { routingConfig.enabled = false; } } else if ( - (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) - || (ctx.model?.provider && isFlatRateProvider(ctx.model.provider)) + (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider, autoModeStartModel.flatRateCtx)) + || (ctx.model?.provider && isFlatRateProvider( + ctx.model.provider, + buildFlatRateContext(ctx.model.provider, ctx, prefs), + )) ) { // Primary model unresolvable but provider signals indicate flat-rate — // disable routing to prevent quality degradation. @@ -416,8 +430,68 @@ export function resolveModelId( * Uses case-insensitive matching with alias support to prevent fail-open on * provider naming variations (e.g. "copilot" vs "github-copilot"). */ -const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot", "claude-code"]); +const BUILTIN_FLAT_RATE = new Set(["github-copilot", "copilot", "claude-code"]); -export function isFlatRateProvider(provider: string): boolean { - return FLAT_RATE_PROVIDERS.has(provider.toLowerCase()); +/** + * Optional context that lets callers extend flat-rate detection beyond the + * hard-coded built-in list. Either signal on its own is enough to classify + * a provider as flat-rate. + */ +export interface FlatRateContext { + /** + * Auth mode for the specific provider being checked, as returned by + * `ctx.modelRegistry.getProviderAuthMode(provider)`. Any provider that + * wraps a local CLI (externalCli) is, by definition, a flat-rate + * subscription wrapper — every request costs the same regardless of + * model, so dynamic routing only degrades quality. + */ + authMode?: "apiKey" | "oauth" | "externalCli" | "none"; + /** + * Case-insensitive list of extra provider IDs the user has declared as + * flat-rate via `preferences.flat_rate_providers`. Used for private + * subscription-backed proxies and enterprise-gated deployments that the + * built-in list doesn't know about. + */ + userFlatRate?: readonly string[]; +} + +export function isFlatRateProvider(provider: string, opts?: FlatRateContext): boolean { + const p = provider.toLowerCase(); + if (BUILTIN_FLAT_RATE.has(p)) return true; + if (opts?.userFlatRate?.some(id => id.toLowerCase() === p)) return true; + if (opts?.authMode === "externalCli") return true; + return false; +} + +/** + * Build a FlatRateContext for a given provider from live runtime state. + * Safe to call when ctx or prefs are undefined — missing pieces are + * treated as "no signal". + */ +export function buildFlatRateContext( + provider: string, + ctx?: { modelRegistry?: { getProviderAuthMode?: (p: string) => string } }, + prefs?: { flat_rate_providers?: readonly string[] }, +): FlatRateContext { + let authMode: FlatRateContext["authMode"]; + const getAuthMode = ctx?.modelRegistry?.getProviderAuthMode; + if (typeof getAuthMode === "function") { + try { + const mode = getAuthMode(provider); + if (mode === "apiKey" || mode === "oauth" || mode === "externalCli" || mode === "none") { + authMode = mode; + } + } catch (err) { + // Registry lookup failure must never break flat-rate detection — + // fall through with authMode undefined and surface the cause. + logWarning( + "dispatch", + `flat-rate auth-mode lookup failed for ${provider}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + return { + authMode, + userFlatRate: prefs?.flat_rate_providers, + }; } diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts index 120a8e01b..0ce08ac19 100644 --- a/src/resources/extensions/gsd/auto-start.ts +++ b/src/resources/extensions/gsd/auto-start.ts @@ -825,12 +825,19 @@ export async function bootstrapAutoSession( ? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}` : ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "default"; - // Flat-rate providers (e.g. GitHub Copilot, claude-code) suppress routing - // at dispatch time (#3453) — reflect that in the banner. - const { isFlatRateProvider } = await import("./auto-model-selection.js"); + // Flat-rate providers (e.g. GitHub Copilot, claude-code, user-declared + // subscription proxies, externalCli CLIs) suppress routing at dispatch + // time (#3453) — reflect that in the banner. Thread the same + // FlatRateContext used by selectAndApplyModel so user-declared + // flat-rate providers and externalCli auto-detection are respected. + const { isFlatRateProvider, buildFlatRateContext } = await import("./auto-model-selection.js"); + const bannerPrefs = loadEffectiveGSDPreferences()?.preferences; const effectiveProvider = s.autoModeStartModel?.provider ?? ctx.model?.provider; const effectivelyEnabled = routingConfig.enabled - && !(effectiveProvider && isFlatRateProvider(effectiveProvider)); + && !(effectiveProvider && isFlatRateProvider( + effectiveProvider, + buildFlatRateContext(effectiveProvider, ctx, bannerPrefs), + )); // The actual ceiling may come from tier_models.heavy, not the start model. const effectiveCeiling = (routingConfig.enabled && routingConfig.tier_models?.heavy) diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts index 75aac4a0c..329faebd7 100644 --- a/src/resources/extensions/gsd/preferences-types.ts +++ b/src/resources/extensions/gsd/preferences-types.ts @@ -113,6 +113,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set([ "discuss_preparation", "discuss_web_research", "discuss_depth", + "flat_rate_providers", ]); /** Canonical list of all dispatch unit types. */ @@ -359,6 +360,17 @@ export interface GSDPreferences { * Default: "standard". */ discuss_depth?: "quick" | "standard" | "thorough"; + /** + * Extra provider IDs to treat as flat-rate (no cost benefit from dynamic + * routing). Dynamic routing is suppressed for any provider listed here, + * in addition to the built-in list (github-copilot, copilot, claude-code) + * and any provider auto-detected via `authMode: "externalCli"`. + * + * Intended for private subscription-backed proxies, enterprise-gated + * deployments, and custom CLI wrappers where every request costs the + * same regardless of model. Case-insensitive. + */ + flat_rate_providers?: string[]; } export interface LoadedGSDPreferences { diff --git a/src/resources/extensions/gsd/preferences-validation.ts b/src/resources/extensions/gsd/preferences-validation.ts index e4ac3d3d6..c703abd1c 100644 --- a/src/resources/extensions/gsd/preferences-validation.ts +++ b/src/resources/extensions/gsd/preferences-validation.ts @@ -180,6 +180,29 @@ export function validatePreferences(preferences: GSDPreferences): { } } + // ─── Flat-rate Providers ──────────────────────────────────────────── + // User-declared flat-rate providers for dynamic routing suppression. + // Built-in providers (github-copilot, copilot, claude-code) and any + // externalCli provider are already auto-detected; this list layers on + // top for private subscription proxies and custom CLI wrappers. + if (preferences.flat_rate_providers !== undefined) { + if (Array.isArray(preferences.flat_rate_providers)) { + const allStrings = preferences.flat_rate_providers.every( + (item: unknown) => typeof item === "string", + ); + if (allStrings) { + // Strip empty/whitespace-only entries to avoid false matches. + validated.flat_rate_providers = preferences.flat_rate_providers + .map((s: string) => s.trim()) + .filter((s: string) => s.length > 0); + } else { + errors.push("flat_rate_providers must be an array of strings"); + } + } else { + errors.push("flat_rate_providers must be an array of strings"); + } + } + // ─── Phase Skip Preferences ───────────────────────────────────────── if (preferences.phases !== undefined) { if (typeof preferences.phases === "object" && preferences.phases !== null) { diff --git a/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts b/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts index 64a93608f..791d095e0 100644 --- a/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts +++ b/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts @@ -6,7 +6,7 @@ import { describe, test } from "node:test"; import assert from "node:assert/strict"; -import { isFlatRateProvider, resolvePreferredModelConfig } from "../auto-model-selection.ts"; +import { buildFlatRateContext, isFlatRateProvider, resolvePreferredModelConfig } from "../auto-model-selection.ts"; describe("flat-rate provider routing guard (#3453)", () => { @@ -48,3 +48,139 @@ describe("flat-rate provider routing guard (#3453)", () => { assert.equal(result, undefined, "Should not create routing config for copilot"); }); }); + +describe("flat-rate provider extensibility (any/all/custom)", () => { + test("regression: built-in providers still flat-rate with no context", () => { + assert.equal(isFlatRateProvider("github-copilot"), true); + assert.equal(isFlatRateProvider("copilot"), true); + assert.equal(isFlatRateProvider("claude-code"), true); + }); + + test("regression: non-flat-rate API providers return false with no context", () => { + assert.equal(isFlatRateProvider("anthropic"), false); + assert.equal(isFlatRateProvider("openai"), false); + assert.equal(isFlatRateProvider("google-vertex"), false); + }); + + test("auto-detection: externalCli auth mode marks provider flat-rate", () => { + // Any provider registered with authMode: "externalCli" is a local + // CLI wrapper around the user's subscription — every request costs + // the same regardless of model, so dynamic routing provides no benefit. + assert.equal( + isFlatRateProvider("my-private-cli", { authMode: "externalCli" }), + true, + ); + }); + + test("auto-detection: non-externalCli auth modes do not mark provider flat-rate", () => { + assert.equal( + isFlatRateProvider("my-http-proxy", { authMode: "apiKey" }), + false, + ); + assert.equal( + isFlatRateProvider("my-http-proxy", { authMode: "oauth" }), + false, + ); + assert.equal( + isFlatRateProvider("my-http-proxy", { authMode: "none" }), + false, + ); + }); + + test("user preference: custom provider listed in userFlatRate is flat-rate", () => { + assert.equal( + isFlatRateProvider("my-ollama-proxy", { userFlatRate: ["my-ollama-proxy"] }), + true, + ); + }); + + test("user preference: case-insensitive match against userFlatRate list", () => { + assert.equal( + isFlatRateProvider("My-Proxy", { userFlatRate: ["my-proxy"] }), + true, + ); + assert.equal( + isFlatRateProvider("my-proxy", { userFlatRate: ["MY-PROXY"] }), + true, + ); + }); + + test("user preference: provider not in userFlatRate list is not flat-rate", () => { + assert.equal( + isFlatRateProvider("other-proxy", { userFlatRate: ["my-proxy"] }), + false, + ); + }); + + test("combined signals: built-in list wins even when context is empty", () => { + assert.equal( + isFlatRateProvider("claude-code", { authMode: "apiKey", userFlatRate: [] }), + true, + ); + }); + + test("combined signals: externalCli auto-detection wins alongside userFlatRate miss", () => { + assert.equal( + isFlatRateProvider("my-cli", { + authMode: "externalCli", + userFlatRate: ["a-different-cli"], + }), + true, + ); + }); +}); + +describe("buildFlatRateContext()", () => { + test("builds a context from ctx.modelRegistry.getProviderAuthMode + prefs", () => { + const ctx = { + modelRegistry: { + getProviderAuthMode: (p: string) => + p === "my-cli" ? "externalCli" : "apiKey", + }, + }; + const prefs = { flat_rate_providers: ["my-proxy"] }; + + const ctxForCli = buildFlatRateContext("my-cli", ctx, prefs); + assert.equal(ctxForCli.authMode, "externalCli"); + assert.deepEqual(ctxForCli.userFlatRate, ["my-proxy"]); + assert.equal(isFlatRateProvider("my-cli", ctxForCli), true); + + const ctxForProxy = buildFlatRateContext("my-proxy", ctx, prefs); + assert.equal(ctxForProxy.authMode, "apiKey"); + assert.equal(isFlatRateProvider("my-proxy", ctxForProxy), true); + + const ctxForOther = buildFlatRateContext("anthropic", ctx, prefs); + assert.equal(ctxForOther.authMode, "apiKey"); + assert.equal(isFlatRateProvider("anthropic", ctxForOther), false); + }); + + test("survives missing ctx and missing prefs", () => { + const empty = buildFlatRateContext("anything"); + assert.equal(empty.authMode, undefined); + assert.equal(empty.userFlatRate, undefined); + assert.equal(isFlatRateProvider("anything", empty), false); + }); + + test("survives a registry lookup that throws", () => { + const ctx = { + modelRegistry: { + getProviderAuthMode: () => { + throw new Error("registry boom"); + }, + }, + }; + const result = buildFlatRateContext("anything", ctx); + // Error must be swallowed — authMode left undefined, function returns. + assert.equal(result.authMode, undefined); + }); + + test("registry returning a non-canonical auth mode is ignored", () => { + const ctx = { + modelRegistry: { + getProviderAuthMode: () => "weird-mode", + }, + }; + const result = buildFlatRateContext("anything", ctx); + assert.equal(result.authMode, undefined); + }); +}); diff --git a/src/resources/extensions/gsd/tests/preferences.test.ts b/src/resources/extensions/gsd/tests/preferences.test.ts index fa08a857e..d9ed2f672 100644 --- a/src/resources/extensions/gsd/tests/preferences.test.ts +++ b/src/resources/extensions/gsd/tests/preferences.test.ts @@ -134,6 +134,53 @@ test("invalid value types produce errors and fall back to undefined", () => { } }); +test("flat_rate_providers: accepts string array", () => { + const { errors, preferences } = validatePreferences({ + flat_rate_providers: ["my-proxy", "private-cli"], + }); + assert.equal(errors.length, 0); + assert.deepEqual(preferences.flat_rate_providers, ["my-proxy", "private-cli"]); +}); + +test("flat_rate_providers: trims whitespace and drops empty entries", () => { + const { errors, preferences } = validatePreferences({ + flat_rate_providers: [" my-proxy ", "", " ", "private-cli"], + }); + assert.equal(errors.length, 0); + assert.deepEqual(preferences.flat_rate_providers, ["my-proxy", "private-cli"]); +}); + +test("flat_rate_providers: non-array rejected", () => { + const { errors } = validatePreferences({ + flat_rate_providers: "my-proxy" as any, + }); + assert.ok( + errors.some(e => e.includes("flat_rate_providers")), + "should error on non-array value", + ); +}); + +test("flat_rate_providers: non-string elements rejected", () => { + const { errors } = validatePreferences({ + flat_rate_providers: ["ok", 123 as any, "also-ok"], + }); + assert.ok( + errors.some(e => e.includes("flat_rate_providers")), + "should error when array contains non-strings", + ); +}); + +test("flat_rate_providers is a recognized preference key (no warning)", () => { + const { warnings } = validatePreferences({ + flat_rate_providers: ["my-proxy"], + }); + assert.equal( + warnings.filter(w => w.includes("flat_rate_providers")).length, + 0, + "flat_rate_providers must be in KNOWN_PREFERENCE_KEYS", + ); +}); + test("valid values pass through correctly", () => { const { preferences: p1 } = validatePreferences({ budget_enforcement: "halt" }); assert.equal(p1.budget_enforcement, "halt");