diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts index 1097964f2..ce33bda61 100644 --- a/src/resources/extensions/gsd/auto-model-selection.ts +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -25,10 +25,17 @@ export interface ModelSelectionResult { export function resolvePreferredModelConfig( unitType: string, autoModeStartModel: { provider: string; id: string } | null, + /** When false, only return explicit per-phase model configs — do not + * synthesize a routing ceiling from dynamic_routing.tier_models (#3962). */ + isAutoMode = true, ) { const explicitConfig = resolveModelWithFallbacksForUnit(unitType); if (explicitConfig) return explicitConfig; + // In interactive mode, don't synthesize a routing-based model config. + // The user's session model (/model) should be used as-is (#3962). + if (!isAutoMode) return undefined; + const routingConfig = resolveDynamicRoutingConfig(); if (!routingConfig.enabled || !routingConfig.tier_models) return undefined; @@ -62,8 +69,11 @@ export async function selectAndApplyModel( verbose: boolean, autoModeStartModel: { provider: string; id: string } | null, retryContext?: { isRetry: boolean; previousTier?: string }, + /** When false (interactive/guided-flow), skip dynamic routing and use the session model. + * Dynamic routing only applies in auto-mode where cost optimization is expected. (#3962) */ + isAutoMode = true, ): Promise { - const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel); + const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode); let routing: { tier: string; modelDowngraded: boolean } | null = null; let appliedModel: Model | null = null; @@ -71,7 +81,13 @@ export async function selectAndApplyModel( const availableModels = ctx.modelRegistry.getAvailable(); // ─── Dynamic Model Routing ───────────────────────────────────────── + // Dynamic routing (complexity-based downgrading) only applies in auto-mode. + // Interactive/guided-flow dispatches use the user's session model directly, + // respecting their /model selection without silent downgrades (#3962). const routingConfig = resolveDynamicRoutingConfig(); + if (!isAutoMode) { + routingConfig.enabled = false; + } let effectiveModelConfig = modelConfig; let routingTierLabel = ""; @@ -123,12 +139,11 @@ export async function selectAndApplyModel( const escalated = escalateTier(retryContext.previousTier as ComplexityTier); if (escalated) { classification = { ...classification, tier: escalated, reason: "escalated after failure" }; - if (verbose) { - ctx.ui.notify( - `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, - "info", - ); - } + // Always notify on tier escalation — model changes should be visible (#3962) + ctx.ui.notify( + `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, + "info", + ); } } @@ -195,24 +210,23 @@ export async function selectAndApplyModel( primary: routingResult.modelId, fallbacks: routingResult.fallbacks, }; - if (verbose) { - if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { - // Verbose scoring breakdown for capability-scored decisions (D-20) - const tierLbl = tierLabel(classification.tier); - const scores = Object.entries(routingResult.capabilityScores) - .sort(([, a], [, b]) => b - a) - .map(([id, score]) => `${id}: ${score.toFixed(1)}`) - .join(", "); - ctx.ui.notify( - `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, - "info", - ); - } else { - ctx.ui.notify( - `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, - "info", - ); - } + // Always notify on model downgrade — users should see when their + // model selection is overridden, not just in verbose mode (#3962). + if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { + const tierLbl = tierLabel(classification.tier); + const scores = Object.entries(routingResult.capabilityScores) + .sort(([, a], [, b]) => b - a) + .map(([id, score]) => `${id}: ${score.toFixed(1)}`) + .join(", "); + ctx.ui.notify( + `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, + "info", + ); } } routingTierLabel = ` [${tierLabel(classification.tier)}]`; diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts index 3f737c638..159916abd 100644 --- a/src/resources/extensions/gsd/auto-start.ts +++ b/src/resources/extensions/gsd/auto-start.ts @@ -83,7 +83,7 @@ import { join } from "node:path"; import { sep as pathSep } from "node:path"; import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js"; -import { resolveDefaultSessionModel } from "./preferences-models.js"; +import { resolveDefaultSessionModel, resolveDynamicRoutingConfig } from "./preferences-models.js"; import type { WorktreeResolver } from "./worktree-resolver.js"; export interface BootstrapDeps { @@ -778,6 +778,39 @@ export async function bootstrapAutoSession( : "Will loop until milestone complete."; ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info"); + // Show dynamic routing status so users know upfront if models will be + // downgraded for simple tasks (#3962). + // Use the same effective logic as selectAndApplyModel: check flat-rate + // provider suppression and resolve the actual ceiling model. + const routingConfig = resolveDynamicRoutingConfig(); + const startModelLabel = s.autoModeStartModel + ? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}` + : ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "default"; + + // Flat-rate providers (e.g. GitHub Copilot, claude-code) suppress routing + // at dispatch time (#3453) — reflect that in the banner. + const { isFlatRateProvider } = await import("./auto-model-selection.js"); + const effectiveProvider = s.autoModeStartModel?.provider ?? ctx.model?.provider; + const effectivelyEnabled = routingConfig.enabled + && !(effectiveProvider && isFlatRateProvider(effectiveProvider)); + + // The actual ceiling may come from tier_models.heavy, not the start model. + const effectiveCeiling = (routingConfig.enabled && routingConfig.tier_models?.heavy) + ? routingConfig.tier_models.heavy + : startModelLabel; + + if (effectivelyEnabled) { + ctx.ui.notify( + `Dynamic routing: enabled — simple tasks may use cheaper models (ceiling: ${effectiveCeiling})`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing: disabled — all tasks will use ${startModelLabel}`, + "info", + ); + } + updateSessionLock( lockBase(), "starting", diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index 6a0122188..79ce5b631 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -295,6 +295,7 @@ async function dispatchWorkflow( const result = await selectAndApplyModel( ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(), prefs, /* verbose */ false, /* autoModeStartModel */ null, + /* retryContext */ undefined, /* isAutoMode */ false, ); if (result.appliedModel) { debugLog("guided-flow-model-applied", { diff --git a/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts new file mode 100644 index 000000000..f209ecc8d --- /dev/null +++ b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts @@ -0,0 +1,207 @@ +// GSD Extension — Interactive Routing Bypass Tests +// Verifies that dynamic routing is skipped for interactive (guided-flow) dispatches +// and that model downgrade notifications always fire (#3962). +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// ─── Source-level structural tests ────────────────────────────────────────── + +const modelSelectionSrc = readFileSync( + join(__dirname, "..", "auto-model-selection.ts"), + "utf-8", +); + +const guidedFlowSrc = readFileSync( + join(__dirname, "..", "guided-flow.ts"), + "utf-8", +); + +const autoStartSrc = readFileSync( + join(__dirname, "..", "auto-start.ts"), + "utf-8", +); + +describe("interactive routing bypass (#3962)", () => { + test("selectAndApplyModel accepts isAutoMode parameter", () => { + // The function signature should include isAutoMode with a default of true + assert.ok( + modelSelectionSrc.includes("isAutoMode"), + "selectAndApplyModel should have isAutoMode parameter", + ); + assert.ok( + modelSelectionSrc.includes("isAutoMode = true"), + "isAutoMode should default to true (auto-mode behavior preserved)", + ); + }); + + test("routing is disabled when isAutoMode is false", () => { + // The code should disable routing when not in auto-mode + assert.ok( + modelSelectionSrc.includes("if (!isAutoMode)"), + "should check isAutoMode flag to disable routing", + ); + assert.ok( + modelSelectionSrc.includes("routingConfig.enabled = false"), + "should set routingConfig.enabled = false for interactive mode", + ); + }); + + test("resolvePreferredModelConfig skips routing synthesis when isAutoMode is false", () => { + // resolvePreferredModelConfig should accept isAutoMode and bail early + // before synthesizing a routing ceiling from tier_models (#3962 codex review) + assert.ok( + modelSelectionSrc.includes("function resolvePreferredModelConfig"), + "resolvePreferredModelConfig should exist", + ); + // The function should check isAutoMode before routing synthesis + const fnIdx = modelSelectionSrc.indexOf("function resolvePreferredModelConfig"); + const fnBody = modelSelectionSrc.slice(fnIdx, fnIdx + 600); + assert.ok( + fnBody.includes("isAutoMode"), + "resolvePreferredModelConfig should accept isAutoMode parameter", + ); + assert.ok( + fnBody.includes("if (!isAutoMode) return undefined"), + "should return undefined (skip routing synthesis) when not in auto-mode", + ); + }); + + test("selectAndApplyModel threads isAutoMode to resolvePreferredModelConfig", () => { + // The call to resolvePreferredModelConfig inside selectAndApplyModel + // should pass isAutoMode as the third argument + const callSite = "resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode)"; + assert.ok( + modelSelectionSrc.includes(callSite), + "selectAndApplyModel should pass isAutoMode to resolvePreferredModelConfig", + ); + }); + + test("guided-flow passes isAutoMode=false", () => { + // guided-flow.ts should explicitly pass isAutoMode as false + assert.ok( + guidedFlowSrc.includes("/* isAutoMode */ false"), + "guided-flow should pass isAutoMode=false to selectAndApplyModel", + ); + }); + + test("auto/phases.ts does NOT pass isAutoMode=false", () => { + // auto/phases.ts should use the default (true) — it's auto-mode + const phasesSrc = readFileSync( + join(__dirname, "..", "auto", "phases.ts"), + "utf-8", + ); + assert.ok( + !phasesSrc.includes("isAutoMode"), + "auto/phases.ts should use default isAutoMode=true (not pass it explicitly)", + ); + }); +}); + +describe("model downgrade notifications always visible (#3962)", () => { + test("downgrade notification is not gated by verbose flag", () => { + // The downgrade notification block should NOT be wrapped in `if (verbose)` + // Find the downgrade block and verify it's not behind a verbose check + const downgradeBlock = "if (routingResult.wasDowngraded)"; + const downgradeIdx = modelSelectionSrc.indexOf(downgradeBlock); + assert.ok(downgradeIdx > 0, "downgrade block should exist"); + + // Extract the code between wasDowngraded check and the next routing label assignment + const afterDowngrade = modelSelectionSrc.slice( + downgradeIdx, + modelSelectionSrc.indexOf("routingTierLabel =", downgradeIdx), + ); + + // The notification calls should NOT be wrapped in `if (verbose)` + assert.ok( + !afterDowngrade.includes("if (verbose)"), + "downgrade notifications should not be gated by verbose flag", + ); + + // But the notification calls should exist + assert.ok( + afterDowngrade.includes('ctx.ui.notify('), + "downgrade notifications should still fire", + ); + }); + + test("tier escalation notification is not gated by verbose flag", () => { + // Extract the escalation block: from "if (escalated)" to its closing + // and verify the notification is present but `if (verbose)` is not. + const escalatedIdx = modelSelectionSrc.indexOf("if (escalated)"); + assert.ok(escalatedIdx > 0, "escalation block should exist"); + + // Get the block from "if (escalated)" to the next closing brace pattern + const block = modelSelectionSrc.slice(escalatedIdx, escalatedIdx + 400); + assert.ok( + block.includes("Tier escalation:"), + "escalation block should contain the notification", + ); + assert.ok( + !block.includes("if (verbose)"), + "escalation block should not gate notification behind verbose flag", + ); + }); +}); + +describe("auto-mode start routing banner (#3962)", () => { + test("auto-start shows dynamic routing status on startup", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing:"), + "auto-start should display routing status banner", + ); + assert.ok( + autoStartSrc.includes("resolveDynamicRoutingConfig"), + "auto-start should import resolveDynamicRoutingConfig", + ); + }); + + test("banner shows different messages for enabled vs disabled routing", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing: enabled"), + "should show message when routing is enabled", + ); + assert.ok( + autoStartSrc.includes("Dynamic routing: disabled"), + "should show message when routing is disabled", + ); + }); + + test("banner shows the ceiling model", () => { + assert.ok( + autoStartSrc.includes("startModelLabel"), + "banner should reference the start/ceiling model", + ); + }); + + test("banner accounts for flat-rate provider suppression", () => { + // The banner should check isFlatRateProvider to accurately reflect + // whether routing will actually be active at dispatch time (#3962 codex review) + assert.ok( + autoStartSrc.includes("isFlatRateProvider"), + "banner should check flat-rate provider status", + ); + assert.ok( + autoStartSrc.includes("effectivelyEnabled"), + "banner should compute effective routing state, not just raw config", + ); + }); + + test("banner uses effective ceiling from tier_models.heavy when configured", () => { + // The actual ceiling may come from tier_models.heavy, not the start model + assert.ok( + autoStartSrc.includes("tier_models?.heavy"), + "banner should check tier_models.heavy for the effective ceiling", + ); + assert.ok( + autoStartSrc.includes("effectiveCeiling"), + "banner should compute the effective ceiling model", + ); + }); +});