Merge pull request #135 from Jamie-BitFlight/feat/model-fallbacks

feat: add model fallback support for auto-mode phases
2026-03-12 22:24:13 -06:00 · 2026-03-12 22:24:13 -06:00 · 18348e2103
commit 18348e2103
parent 7f2ba0e839 f1cf77a738
3 changed files with 156 additions and 17 deletions
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@ -40,7 +40,7 @@ import {
  readUnitRuntimeRecord,
  writeUnitRuntimeRecord,
 } from "./unit-runtime.js";
-import { resolveAutoSupervisorConfig, resolveModelForUnit, resolveSkillDiscoveryMode, loadEffectiveGSDPreferences } from "./preferences.js";
+import { resolveAutoSupervisorConfig, resolveModelForUnit, resolveModelWithFallbacksForUnit, resolveSkillDiscoveryMode, loadEffectiveGSDPreferences } from "./preferences.js";
 import type { GSDPreferences } from "./preferences.js";
 import {
  validatePlanBoundary,
@ -1395,17 +1395,50 @@ async function dispatchNextUnit(
  }

  // Switch model if preferences specify one for this unit type
-  const preferredModelId = resolveModelForUnit(unitType);
-  if (preferredModelId) {
-    // Try to find the model across all providers
+  // Try primary model, then fallbacks in order if setting fails
+  const modelConfig = resolveModelWithFallbacksForUnit(unitType);
+  if (modelConfig) {
    const allModels = ctx.modelRegistry.getAll();
-    const model = allModels.find(m => m.id === preferredModelId);
-    if (model) {
+    const modelsToTry = [modelConfig.primary, ...modelConfig.fallbacks];
+    let modelSet = false;
+
+    for (const modelId of modelsToTry) {
+      const model = allModels.find(m => m.id === modelId);
+      if (!model) {
+        ctx.ui.notify(`Model ${modelId} not found in registry, trying fallback.`, "warning");
+        continue;
+      }
+
      const ok = await pi.setModel(model, { persist: false });
      if (ok) {
-        ctx.ui.notify(`Model: ${preferredModelId}`, "info");
+        const fallbackNote = modelId === modelConfig.primary
+          ? ""
+          : ` (fallback from ${modelConfig.primary})`;
+        ctx.ui.notify(`Model: ${modelId}${fallbackNote}`, "info");
+        modelSet = true;
+        break;
+      } else {
+        const nextModel = modelsToTry[modelsToTry.indexOf(modelId) + 1];
+        if (nextModel) {
+          ctx.ui.notify(
+            `Failed to set model ${modelId}, trying fallback ${nextModel}...`,
+            "warning",
+          );
+        } else {
+          ctx.ui.notify(
+            `Failed to set model ${modelId} and all fallbacks exhausted. Using default model.`,
+            "warning",
+          );
+        }
      }
    }
+
+    if (!modelSet) {
+      ctx.ui.notify(
+        `Could not set any preferred model for ${unitType}. Continuing with default.`,
+        "warning",
+      );
+    }
  }

  // Start progress-aware supervision: a soft warning, an idle watchdog, and
--- a/src/resources/extensions/gsd/docs/preferences-reference.md
+++ b/src/resources/extensions/gsd/docs/preferences-reference.md
@ -27,7 +27,10 @@ Full documentation for `~/.gsd/preferences.md` (global) and `.gsd/preferences.md

 - `custom_instructions`: extra durable instructions related to skill use.

- `models`: per-stage model selection for auto-mode. Keys: `research`, `planning`, `execution`, `completion`. Values: model IDs (e.g. `claude-sonnet-4-6`, `claude-opus-4-6`). Omit a key to use whatever model is currently active.
+- `models`: per-stage model selection for auto-mode. Keys: `research`, `planning`, `execution`, `completion`. Values can be:
+  - Simple string: `"claude-sonnet-4-6"` — single model, no fallbacks
+  - Object with fallbacks: `{ model: "claude-opus-4-6", fallbacks: ["glm-5", "minimax-m2.5"] }` — tries fallbacks in order if primary fails
+  - Omit a key to use whatever model is currently active. Fallbacks are tried when model switching fails (provider unavailable, rate limited, etc.).

 - `skill_discovery`: controls how GSD discovers and applies skills during auto-mode. Valid values:
  - `auto` — skills are found and applied automatically without prompting.
@ -75,6 +78,47 @@ models:

 Opus for planning (where architectural decisions matter most), Sonnet for everything else (faster, cheaper). Omit any key to use the currently selected model.

+## Models with Fallbacks Example
+
+```yaml
+---
+version: 1
+models:
+  research:
+    model: openrouter/deepseek/deepseek-r1
+    fallbacks:
+      - openrouter/minimax/minimax-m2.5
+  planning:
+    model: claude-opus-4-6
+    fallbacks:
+      - openrouter/z-ai/glm-5
+      - openrouter/moonshotai/kimi-k2.5
+  execution:
+    model: openrouter/z-ai/glm-5
+    fallbacks:
+      - openrouter/minimax/minimax-m2.5
+  completion: openrouter/minimax/minimax-m2.5
+---
+```
+
+When a model fails to switch (provider unavailable, rate limited, credits exhausted), GSD automatically tries the next model in the `fallbacks` list. This ensures auto-mode continues even when your preferred provider hits limits.
+
+**Cost-optimized example** — use cheap models with expensive ones as fallback for critical phases:
+
+```yaml
+---
+version: 1
+models:
+  research: openrouter/deepseek/deepseek-r1  # $0.28/$0.42 per 1M tokens
+  planning:
+    model: claude-opus-4-6                   # $5/$25 — best for architecture
+    fallbacks:
+      - openrouter/z-ai/glm-5                # $1/$3.20 — strong alternative
+  execution: openrouter/minimax/minimax-m2.5 # $0.30/$1.20 — cheapest quality
+  completion: openrouter/minimax/minimax-m2.5
+---
+```
+
 ---

 ## Example Variations
--- a/src/resources/extensions/gsd/preferences.ts
+++ b/src/resources/extensions/gsd/preferences.ts
@ -17,11 +17,43 @@ export interface GSDSkillRule {
  avoid?: string[];
 }

+/**
+ * Model configuration for a single phase.
+ * Supports primary model with optional fallbacks for resilience.
+ */
+export interface GSDPhaseModelConfig {
+  /** Primary model ID (e.g., "claude-opus-4-6") */
+  model: string;
+  /** Fallback models to try in order if primary fails (e.g., rate limits, credits exhausted) */
+  fallbacks?: string[];
+}
+
+/**
+ * Legacy model config — simple string per phase.
+ * Kept for backward compatibility; will be migrated to GSDModelConfigV2 on load.
+ */
 export interface GSDModelConfig {
-  research?: string;   // e.g. "claude-sonnet-4-6"
-  planning?: string;   // e.g. "claude-opus-4-6"
-  execution?: string;  // e.g. "claude-sonnet-4-6"
-  completion?: string; // e.g. "claude-sonnet-4-6"
+  research?: string;
+  planning?: string;
+  execution?: string;
+  completion?: string;
+}
+
+/**
+ * Extended model config with per-phase fallback support.
+ * Each phase can specify a primary model and ordered fallbacks.
+ */
+export interface GSDModelConfigV2 {
+  research?: string | GSDPhaseModelConfig;
+  planning?: string | GSDPhaseModelConfig;
+  execution?: string | GSDPhaseModelConfig;
+  completion?: string | GSDPhaseModelConfig;
+}
+
+/** Normalized model selection with resolved fallbacks */
+export interface ResolvedModelConfig {
+  primary: string;
+  fallbacks: string[];
 }

 export type SkillDiscoveryMode = "auto" | "suggest" | "off";
@ -464,26 +496,56 @@ export function resolveSkillDiscoveryMode(): SkillDiscoveryMode {
 * Returns undefined if no model preference is set for this unit type.
 */
 export function resolveModelForUnit(unitType: string): string | undefined {
+  const resolved = resolveModelWithFallbacksForUnit(unitType);
+  return resolved?.primary;
+}
+
+/**
+ * Resolve model and fallbacks for a given auto-mode unit type.
+ * Returns the primary model and ordered fallbacks, or undefined if not configured.
+ *
+ * Supports both legacy string format and extended object format:
+ * - Legacy: `planning: claude-opus-4-6`
+ * - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
+ */
+export function resolveModelWithFallbacksForUnit(unitType: string): ResolvedModelConfig | undefined {
  const prefs = loadEffectiveGSDPreferences();
  if (!prefs?.preferences.models) return undefined;
-  const m = prefs.preferences.models;
+  const m = prefs.preferences.models as GSDModelConfigV2;

+  let phaseConfig: string | GSDPhaseModelConfig | undefined;
  switch (unitType) {
    case "research-milestone":
    case "research-slice":
-      return m.research;
+      phaseConfig = m.research;
+      break;
    case "plan-milestone":
    case "plan-slice":
    case "replan-slice":
-      return m.planning;
+      phaseConfig = m.planning;
+      break;
    case "execute-task":
-      return m.execution;
+      phaseConfig = m.execution;
+      break;
    case "complete-slice":
    case "run-uat":
-      return m.completion;
+      phaseConfig = m.completion;
+      break;
    default:
      return undefined;
  }
+
+  if (!phaseConfig) return undefined;
+
+  // Normalize: string -> { model, fallbacks: [] }
+  if (typeof phaseConfig === "string") {
+    return { primary: phaseConfig, fallbacks: [] };
+  }
+
+  return {
+    primary: phaseConfig.model,
+    fallbacks: phaseConfig.fallbacks ?? [],
+  };
 }

 export function resolveAutoSupervisorConfig(): AutoSupervisorConfig {