Merge pull request #3963 from jeremymcs/fix/3962-model-routing-transparency

fix(routing): skip dynamic routing for interactive dispatches (#3962)
2026-04-10 22:54:10 -05:00 · 2026-04-10 22:54:10 -05:00 · 1e6919d92b
commit 1e6919d92b
parent 19cbb17683 66710f2b28
4 changed files with 281 additions and 26 deletions
--- a/src/resources/extensions/gsd/auto-model-selection.ts
+++ b/src/resources/extensions/gsd/auto-model-selection.ts
@ -25,10 +25,17 @@ export interface ModelSelectionResult {
 export function resolvePreferredModelConfig(
  unitType: string,
  autoModeStartModel: { provider: string; id: string } | null,
+  /** When false, only return explicit per-phase model configs — do not
+   *  synthesize a routing ceiling from dynamic_routing.tier_models (#3962). */
+  isAutoMode = true,
 ) {
  const explicitConfig = resolveModelWithFallbacksForUnit(unitType);
  if (explicitConfig) return explicitConfig;

+  // In interactive mode, don't synthesize a routing-based model config.
+  // The user's session model (/model) should be used as-is (#3962).
+  if (!isAutoMode) return undefined;
+
  const routingConfig = resolveDynamicRoutingConfig();
  if (!routingConfig.enabled || !routingConfig.tier_models) return undefined;

@ -62,8 +69,11 @@ export async function selectAndApplyModel(
  verbose: boolean,
  autoModeStartModel: { provider: string; id: string } | null,
  retryContext?: { isRetry: boolean; previousTier?: string },
+  /** When false (interactive/guided-flow), skip dynamic routing and use the session model.
+   *  Dynamic routing only applies in auto-mode where cost optimization is expected. (#3962) */
+  isAutoMode = true,
 ): Promise<ModelSelectionResult> {
-  const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel);
+  const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode);
  let routing: { tier: string; modelDowngraded: boolean } | null = null;
  let appliedModel: Model<Api> | null = null;

@ -71,7 +81,13 @@ export async function selectAndApplyModel(
    const availableModels = ctx.modelRegistry.getAvailable();

    // ─── Dynamic Model Routing ─────────────────────────────────────────
+    // Dynamic routing (complexity-based downgrading) only applies in auto-mode.
+    // Interactive/guided-flow dispatches use the user's session model directly,
+    // respecting their /model selection without silent downgrades (#3962).
    const routingConfig = resolveDynamicRoutingConfig();
+    if (!isAutoMode) {
+      routingConfig.enabled = false;
+    }
    let effectiveModelConfig = modelConfig;
    let routingTierLabel = "";

@ -123,12 +139,11 @@ export async function selectAndApplyModel(
          const escalated = escalateTier(retryContext.previousTier as ComplexityTier);
          if (escalated) {
            classification = { ...classification, tier: escalated, reason: "escalated after failure" };
-            if (verbose) {
-              ctx.ui.notify(
-                `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`,
-                "info",
-              );
-            }
+            // Always notify on tier escalation — model changes should be visible (#3962)
+            ctx.ui.notify(
+              `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`,
+              "info",
+            );
          }
        }

@ -195,24 +210,23 @@ export async function selectAndApplyModel(
            primary: routingResult.modelId,
            fallbacks: routingResult.fallbacks,
          };
-          if (verbose) {
-            if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) {
-              // Verbose scoring breakdown for capability-scored decisions (D-20)
-              const tierLbl = tierLabel(classification.tier);
-              const scores = Object.entries(routingResult.capabilityScores)
-                .sort(([, a], [, b]) => b - a)
-                .map(([id, score]) => `${id}: ${score.toFixed(1)}`)
-                .join(", ");
-              ctx.ui.notify(
-                `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`,
-                "info",
-              );
-            } else {
-              ctx.ui.notify(
-                `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`,
-                "info",
-              );
-            }
+          // Always notify on model downgrade — users should see when their
+          // model selection is overridden, not just in verbose mode (#3962).
+          if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) {
+            const tierLbl = tierLabel(classification.tier);
+            const scores = Object.entries(routingResult.capabilityScores)
+              .sort(([, a], [, b]) => b - a)
+              .map(([id, score]) => `${id}: ${score.toFixed(1)}`)
+              .join(", ");
+            ctx.ui.notify(
+              `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`,
+              "info",
+            );
+          } else {
+            ctx.ui.notify(
+              `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`,
+              "info",
+            );
          }
        }
        routingTierLabel = ` [${tierLabel(classification.tier)}]`;
--- a/src/resources/extensions/gsd/auto-start.ts
+++ b/src/resources/extensions/gsd/auto-start.ts
@ -83,7 +83,7 @@ import { join } from "node:path";
 import { sep as pathSep } from "node:path";

 import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js";
-import { resolveDefaultSessionModel } from "./preferences-models.js";
+import { resolveDefaultSessionModel, resolveDynamicRoutingConfig } from "./preferences-models.js";
 import type { WorktreeResolver } from "./worktree-resolver.js";

 export interface BootstrapDeps {
@ -778,6 +778,39 @@ export async function bootstrapAutoSession(
        : "Will loop until milestone complete.";
    ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");

+    // Show dynamic routing status so users know upfront if models will be
+    // downgraded for simple tasks (#3962).
+    // Use the same effective logic as selectAndApplyModel: check flat-rate
+    // provider suppression and resolve the actual ceiling model.
+    const routingConfig = resolveDynamicRoutingConfig();
+    const startModelLabel = s.autoModeStartModel
+      ? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}`
+      : ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "default";
+
+    // Flat-rate providers (e.g. GitHub Copilot, claude-code) suppress routing
+    // at dispatch time (#3453) — reflect that in the banner.
+    const { isFlatRateProvider } = await import("./auto-model-selection.js");
+    const effectiveProvider = s.autoModeStartModel?.provider ?? ctx.model?.provider;
+    const effectivelyEnabled = routingConfig.enabled
+      && !(effectiveProvider && isFlatRateProvider(effectiveProvider));
+
+    // The actual ceiling may come from tier_models.heavy, not the start model.
+    const effectiveCeiling = (routingConfig.enabled && routingConfig.tier_models?.heavy)
+      ? routingConfig.tier_models.heavy
+      : startModelLabel;
+
+    if (effectivelyEnabled) {
+      ctx.ui.notify(
+        `Dynamic routing: enabled — simple tasks may use cheaper models (ceiling: ${effectiveCeiling})`,
+        "info",
+      );
+    } else {
+      ctx.ui.notify(
+        `Dynamic routing: disabled — all tasks will use ${startModelLabel}`,
+        "info",
+      );
+    }
+
    updateSessionLock(
      lockBase(),
      "starting",
--- a/src/resources/extensions/gsd/guided-flow.ts
+++ b/src/resources/extensions/gsd/guided-flow.ts
@ -295,6 +295,7 @@ async function dispatchWorkflow(
    const result = await selectAndApplyModel(
      ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(),
      prefs, /* verbose */ false, /* autoModeStartModel */ null,
+      /* retryContext */ undefined, /* isAutoMode */ false,
    );
    if (result.appliedModel) {
      debugLog("guided-flow-model-applied", {
--- a/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts
+++ b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts
@ -0,0 +1,207 @@
+// GSD Extension — Interactive Routing Bypass Tests
+// Verifies that dynamic routing is skipped for interactive (guided-flow) dispatches
+// and that model downgrade notifications always fire (#3962).
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// ─── Source-level structural tests ──────────────────────────────────────────
+
+const modelSelectionSrc = readFileSync(
+  join(__dirname, "..", "auto-model-selection.ts"),
+  "utf-8",
+);
+
+const guidedFlowSrc = readFileSync(
+  join(__dirname, "..", "guided-flow.ts"),
+  "utf-8",
+);
+
+const autoStartSrc = readFileSync(
+  join(__dirname, "..", "auto-start.ts"),
+  "utf-8",
+);
+
+describe("interactive routing bypass (#3962)", () => {
+  test("selectAndApplyModel accepts isAutoMode parameter", () => {
+    // The function signature should include isAutoMode with a default of true
+    assert.ok(
+      modelSelectionSrc.includes("isAutoMode"),
+      "selectAndApplyModel should have isAutoMode parameter",
+    );
+    assert.ok(
+      modelSelectionSrc.includes("isAutoMode = true"),
+      "isAutoMode should default to true (auto-mode behavior preserved)",
+    );
+  });
+
+  test("routing is disabled when isAutoMode is false", () => {
+    // The code should disable routing when not in auto-mode
+    assert.ok(
+      modelSelectionSrc.includes("if (!isAutoMode)"),
+      "should check isAutoMode flag to disable routing",
+    );
+    assert.ok(
+      modelSelectionSrc.includes("routingConfig.enabled = false"),
+      "should set routingConfig.enabled = false for interactive mode",
+    );
+  });
+
+  test("resolvePreferredModelConfig skips routing synthesis when isAutoMode is false", () => {
+    // resolvePreferredModelConfig should accept isAutoMode and bail early
+    // before synthesizing a routing ceiling from tier_models (#3962 codex review)
+    assert.ok(
+      modelSelectionSrc.includes("function resolvePreferredModelConfig"),
+      "resolvePreferredModelConfig should exist",
+    );
+    // The function should check isAutoMode before routing synthesis
+    const fnIdx = modelSelectionSrc.indexOf("function resolvePreferredModelConfig");
+    const fnBody = modelSelectionSrc.slice(fnIdx, fnIdx + 600);
+    assert.ok(
+      fnBody.includes("isAutoMode"),
+      "resolvePreferredModelConfig should accept isAutoMode parameter",
+    );
+    assert.ok(
+      fnBody.includes("if (!isAutoMode) return undefined"),
+      "should return undefined (skip routing synthesis) when not in auto-mode",
+    );
+  });
+
+  test("selectAndApplyModel threads isAutoMode to resolvePreferredModelConfig", () => {
+    // The call to resolvePreferredModelConfig inside selectAndApplyModel
+    // should pass isAutoMode as the third argument
+    const callSite = "resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode)";
+    assert.ok(
+      modelSelectionSrc.includes(callSite),
+      "selectAndApplyModel should pass isAutoMode to resolvePreferredModelConfig",
+    );
+  });
+
+  test("guided-flow passes isAutoMode=false", () => {
+    // guided-flow.ts should explicitly pass isAutoMode as false
+    assert.ok(
+      guidedFlowSrc.includes("/* isAutoMode */ false"),
+      "guided-flow should pass isAutoMode=false to selectAndApplyModel",
+    );
+  });
+
+  test("auto/phases.ts does NOT pass isAutoMode=false", () => {
+    // auto/phases.ts should use the default (true) — it's auto-mode
+    const phasesSrc = readFileSync(
+      join(__dirname, "..", "auto", "phases.ts"),
+      "utf-8",
+    );
+    assert.ok(
+      !phasesSrc.includes("isAutoMode"),
+      "auto/phases.ts should use default isAutoMode=true (not pass it explicitly)",
+    );
+  });
+});
+
+describe("model downgrade notifications always visible (#3962)", () => {
+  test("downgrade notification is not gated by verbose flag", () => {
+    // The downgrade notification block should NOT be wrapped in `if (verbose)`
+    // Find the downgrade block and verify it's not behind a verbose check
+    const downgradeBlock = "if (routingResult.wasDowngraded)";
+    const downgradeIdx = modelSelectionSrc.indexOf(downgradeBlock);
+    assert.ok(downgradeIdx > 0, "downgrade block should exist");
+
+    // Extract the code between wasDowngraded check and the next routing label assignment
+    const afterDowngrade = modelSelectionSrc.slice(
+      downgradeIdx,
+      modelSelectionSrc.indexOf("routingTierLabel =", downgradeIdx),
+    );
+
+    // The notification calls should NOT be wrapped in `if (verbose)`
+    assert.ok(
+      !afterDowngrade.includes("if (verbose)"),
+      "downgrade notifications should not be gated by verbose flag",
+    );
+
+    // But the notification calls should exist
+    assert.ok(
+      afterDowngrade.includes('ctx.ui.notify('),
+      "downgrade notifications should still fire",
+    );
+  });
+
+  test("tier escalation notification is not gated by verbose flag", () => {
+    // Extract the escalation block: from "if (escalated)" to its closing
+    // and verify the notification is present but `if (verbose)` is not.
+    const escalatedIdx = modelSelectionSrc.indexOf("if (escalated)");
+    assert.ok(escalatedIdx > 0, "escalation block should exist");
+
+    // Get the block from "if (escalated)" to the next closing brace pattern
+    const block = modelSelectionSrc.slice(escalatedIdx, escalatedIdx + 400);
+    assert.ok(
+      block.includes("Tier escalation:"),
+      "escalation block should contain the notification",
+    );
+    assert.ok(
+      !block.includes("if (verbose)"),
+      "escalation block should not gate notification behind verbose flag",
+    );
+  });
+});
+
+describe("auto-mode start routing banner (#3962)", () => {
+  test("auto-start shows dynamic routing status on startup", () => {
+    assert.ok(
+      autoStartSrc.includes("Dynamic routing:"),
+      "auto-start should display routing status banner",
+    );
+    assert.ok(
+      autoStartSrc.includes("resolveDynamicRoutingConfig"),
+      "auto-start should import resolveDynamicRoutingConfig",
+    );
+  });
+
+  test("banner shows different messages for enabled vs disabled routing", () => {
+    assert.ok(
+      autoStartSrc.includes("Dynamic routing: enabled"),
+      "should show message when routing is enabled",
+    );
+    assert.ok(
+      autoStartSrc.includes("Dynamic routing: disabled"),
+      "should show message when routing is disabled",
+    );
+  });
+
+  test("banner shows the ceiling model", () => {
+    assert.ok(
+      autoStartSrc.includes("startModelLabel"),
+      "banner should reference the start/ceiling model",
+    );
+  });
+
+  test("banner accounts for flat-rate provider suppression", () => {
+    // The banner should check isFlatRateProvider to accurately reflect
+    // whether routing will actually be active at dispatch time (#3962 codex review)
+    assert.ok(
+      autoStartSrc.includes("isFlatRateProvider"),
+      "banner should check flat-rate provider status",
+    );
+    assert.ok(
+      autoStartSrc.includes("effectivelyEnabled"),
+      "banner should compute effective routing state, not just raw config",
+    );
+  });
+
+  test("banner uses effective ceiling from tier_models.heavy when configured", () => {
+    // The actual ceiling may come from tier_models.heavy, not the start model
+    assert.ok(
+      autoStartSrc.includes("tier_models?.heavy"),
+      "banner should check tier_models.heavy for the effective ceiling",
+    );
+    assert.ok(
+      autoStartSrc.includes("effectiveCeiling"),
+      "banner should compute the effective ceiling model",
+    );
+  });
+});