port gsd2: blocked-models gate, milestone-summary classifier, unsupported-model recovery

blocked-models.ts (new): Persistent per-project blocklist at .sf/runtime/blocked-models.json. loadBlockedModels / isModelBlocked / blockModel (file-lock-safe write). milestone-summary-classifier.ts (new): classifyMilestoneSummaryContent → "success" | "failure" | "unknown". isTerminalMilestoneSummaryContent: failure summaries are NOT terminal — lets auto-mode re-enter a milestone after a failed recovery summary. state.ts: Phase 1 (completeMilestoneIds) and Phase 2 (registry) now check isTerminalMilestoneSummaryContent before treating a SUMMARY as complete. A failure SUMMARY no longer prematurely parks a milestone. error-classifier.ts: Add "unsupported-model" ErrorClass kind with regex detection (model + not-supported/unavailable/no-access + account/plan/tier). Checked before "permanent" so /account/i in PERMANENT_RE doesn't swallow it. auto-model-selection.ts: Wire isModelBlocked() gate in selectAndApplyModel candidate loop: skips provider-rejected models and continues to fallbacks. bootstrap/agent-end-recovery.ts: Handle cls.kind === "unsupported-model": blockModel(), try fallback chain skipping already-blocked models, pause if no usable fallback. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 10:13:27 +02:00 · 2026-04-25 10:13:27 +02:00 · 5887ea3fd1
commit 5887ea3fd1
parent 6cb6de4fd2
6 changed files with 273 additions and 25 deletions
--- a/src/resources/extensions/sf/auto-model-selection.ts
+++ b/src/resources/extensions/sf/auto-model-selection.ts
@ -19,6 +19,7 @@ import { logWarning } from "./workflow-logger.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { applyModelPolicyFilter } from "./uok/model-policy.js";
 import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-mcp.js";
+import { isModelBlocked } from "./blocked-models.js";

 /**
 * Thrown when the model-policy gate rejects every candidate model for a unit
@ -446,6 +447,18 @@ export async function selectAndApplyModel(
        attemptedPolicyEligible = true;
      }

+      // Skip models the provider has previously rejected for this account
+      // (issue #4513).  The block is persisted in .sf/runtime/blocked-models.json
+      // so it survives /sf auto restarts — without this, the same dead model
+      // gets reselected after every restart.
+      if (isModelBlocked(basePath, model.provider, model.id)) {
+        ctx.ui.notify(
+          `Skipping blocked model ${model.provider}/${model.id} (provider rejected it for this account).`,
+          "warning",
+        );
+        continue;
+      }
+
      // Warn if the ID is ambiguous across providers
      if (!modelId.includes("/")) {
        const providers = availableModels.filter(m => m.id === modelId).map(m => m.provider);
--- a/src/resources/extensions/sf/blocked-models.ts
+++ b/src/resources/extensions/sf/blocked-models.ts
@ -0,0 +1,98 @@
+// SF — Persistent per-project blocklist of provider/model pairs that the
+// provider has rejected at request time for account entitlement reasons.
+//
+// Lives at `.sf/runtime/blocked-models.json` so the block survives /sf auto
+// restarts.  Auto-mode model selection skips blocked entries; agent-end
+// recovery adds entries when a runtime rejection is classified as
+// `unsupported-model`.  See issue #4513.
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { sfRoot } from "./paths.js";
+import { withFileLockSync } from "./file-lock.js";
+
+export interface BlockedModelEntry {
+  provider: string;
+  id: string;
+  reason: string;
+  blockedAt: number;
+}
+
+interface BlockedModelsFile {
+  version: 1;
+  blocked: BlockedModelEntry[];
+}
+
+function blockedModelsPath(basePath: string): string {
+  return join(sfRoot(basePath), "runtime", "blocked-models.json");
+}
+
+function modelKey(provider: string, id: string): string {
+  return `${provider.toLowerCase()}/${id.toLowerCase()}`;
+}
+
+function readFileSafe(path: string): BlockedModelsFile {
+  if (!existsSync(path)) return { version: 1, blocked: [] };
+  try {
+    const raw = readFileSync(path, "utf-8");
+    const parsed = JSON.parse(raw) as Partial<BlockedModelsFile>;
+    if (!parsed || !Array.isArray(parsed.blocked)) {
+      return { version: 1, blocked: [] };
+    }
+    const blocked = parsed.blocked.filter(
+      (e): e is BlockedModelEntry =>
+        !!e && typeof e.provider === "string" && typeof e.id === "string",
+    );
+    return { version: 1, blocked };
+  } catch {
+    // Corrupted JSON: treat as empty so a bad file never blocks dispatch.
+    return { version: 1, blocked: [] };
+  }
+}
+
+export function loadBlockedModels(basePath: string): BlockedModelEntry[] {
+  return readFileSafe(blockedModelsPath(basePath)).blocked;
+}
+
+export function isModelBlocked(
+  basePath: string,
+  provider: string | undefined,
+  id: string | undefined,
+): boolean {
+  if (!provider || !id) return false;
+  const target = modelKey(provider, id);
+  return loadBlockedModels(basePath).some(
+    (e) => modelKey(e.provider, e.id) === target,
+  );
+}
+
+export function blockModel(
+  basePath: string,
+  provider: string,
+  id: string,
+  reason: string,
+): void {
+  const path = blockedModelsPath(basePath);
+  mkdirSync(dirname(path), { recursive: true });
+  // Ensure the file exists before we try to lock it — proper-lockfile requires
+  // the target path to exist (file-lock.ts falls through to an unlocked call
+  // otherwise).
+  if (!existsSync(path)) {
+    writeFileSync(path, JSON.stringify({ version: 1, blocked: [] }, null, 2) + "\n", "utf-8");
+  }
+  withFileLockSync(path, () => {
+    const current = readFileSafe(path);
+    const target = modelKey(provider, id);
+    if (current.blocked.some((e) => modelKey(e.provider, e.id) === target)) {
+      return;
+    }
+    const next: BlockedModelsFile = {
+      version: 1,
+      blocked: [
+        ...current.blocked,
+        { provider, id, reason, blockedAt: Date.now() },
+      ],
+    };
+    writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");
+  });
+}
--- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts
+++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts
@ -1,6 +1,7 @@
 import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-coding-agent";

 import { logWarning } from "../workflow-logger.js";
+import { blockModel, isModelBlocked } from "../blocked-models.js";
 import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
 import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto, setCurrentDispatchedModelId } from "../auto.js";
 import { getNextFallbackModel, resolveModelWithFallbacksForUnit, resolvePersistModelChanges } from "../preferences.js";
@ -142,6 +143,61 @@ export async function handleAgentEnd(
      }
    }

+    // ── 1c. Unsupported-model: provider rejected this model for the current
+    //        account/plan at request time (#4513).  Persist a block so the
+    //        same dead model isn't reselected on the next /sf auto restart,
+    //        then try a fallback before pausing.
+    if (cls.kind === "unsupported-model") {
+      const dash = getAutoDashboardData();
+      const rejectedProvider = ctx.model?.provider;
+      const rejectedId = ctx.model?.id;
+      if (dash.basePath && rejectedProvider && rejectedId) {
+        try {
+          blockModel(dash.basePath, rejectedProvider, rejectedId, rawErrorMsg || "unsupported for account");
+          ctx.ui.notify(
+            `Blocked ${rejectedProvider}/${rejectedId} for this project — provider rejected it for the current account.`,
+            "warning",
+          );
+        } catch (err) {
+          const m = err instanceof Error ? err.message : String(err);
+          logWarning("bootstrap", `Failed to persist blocked model: ${m}`);
+        }
+      }
+
+      // Try configured fallback chain, skipping anything already blocked.
+      if (dash.currentUnit && dash.basePath) {
+        const modelConfig = resolveModelWithFallbacksForUnit(dash.currentUnit.type);
+        if (modelConfig && modelConfig.fallbacks.length > 0) {
+          const availableModels = ctx.modelRegistry.getAvailable();
+          let cursorModelId: string | undefined = ctx.model?.id;
+          while (true) {
+            const nextModelId = getNextFallbackModel(cursorModelId, modelConfig);
+            if (!nextModelId) break;
+            if (isModelBlocked(dash.basePath, ctx.model?.provider, nextModelId)) {
+              cursorModelId = nextModelId;
+              continue;
+            }
+            const modelToSet = resolveModelId(nextModelId, availableModels, ctx.model?.provider);
+            if (modelToSet && !isModelBlocked(dash.basePath, modelToSet.provider, modelToSet.id)) {
+              const persistModelChanges = resolvePersistModelChanges();
+              const ok = await pi.setModel(modelToSet, { persist: persistModelChanges });
+              if (ok) {
+                setCurrentDispatchedModelId({ provider: modelToSet.provider, id: modelToSet.id });
+                ctx.ui.notify(`Switched to unblocked fallback: ${nextModelId} and resuming.`, "info");
+                pi.sendMessage({ customType: "sf-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
+                return;
+              }
+            }
+            cursorModelId = nextModelId;
+          }
+        }
+      }
+
+      // No usable fallback — pause
+      await pauseAutoForProviderError(pi, `Model unsupported for this account${errorDetail}`);
+      return;
+    }
+
    // ── 2. Decide & Act ──────────────────────────────────────────────────

    // --- Network errors: same-model retry with backoff ---
--- a/src/resources/extensions/sf/error-classifier.ts
+++ b/src/resources/extensions/sf/error-classifier.ts
@ -13,12 +13,13 @@
 // ── ErrorClass discriminated union ──────────────────────────────────────────

 export type ErrorClass =
-  | { kind: "network";      retryAfterMs: number }
-  | { kind: "rate-limit";   retryAfterMs: number }
-  | { kind: "server";       retryAfterMs: number }
-  | { kind: "stream";       retryAfterMs: number }
-  | { kind: "connection";   retryAfterMs: number }
+  | { kind: "network";          retryAfterMs: number }
+  | { kind: "rate-limit";       retryAfterMs: number }
+  | { kind: "server";           retryAfterMs: number }
+  | { kind: "stream";           retryAfterMs: number }
+  | { kind: "connection";       retryAfterMs: number }
  | { kind: "model-error" }
+  | { kind: "unsupported-model" }
  | { kind: "permanent" }
  | { kind: "unknown" };

@ -45,6 +46,12 @@ export function resetRetryState(state: RetryState): void {
 const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
 // Include provider-specific quota-window phrasing like "hit your limit", "usage limit", "quota reached"
 const RATE_LIMIT_RE = /rate.?limit|too many requests|429|hit your limit|usage limit|quota (?:reached|hit)|limit.*resets?/i;
+// Unsupported-model: provider rejected the model for the current account/plan (#4513).
+// Checked before `permanent` because PERMANENT_RE also matches /account/i.
+const UNSUPPORTED_MODEL_MODEL_RE = /\b(?:model|deployment)\b/i;
+const UNSUPPORTED_MODEL_INDICATOR_RE =
+  /\bnot support(?:ed|s)?\b|\bunsupported\b|\bnot available\b|\bunavailable\b|\bno access\b|\bdoes(?:n['']t| not) (?:have access|support)\b|\bnot authori[sz]ed\b/i;
+const UNSUPPORTED_MODEL_SCOPE_RE = /\b(?:account|plan|tier|subscription)\b/i;
 // OpenRouter affordability-style quota errors should be treated as transient
 // so core retry logic can lower maxTokens and continue in-session.
 const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;
@ -72,6 +79,19 @@ const RESET_DELAY_RE = /reset in (\d+)s/i;
 export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
  const isPermanent = PERMANENT_RE.test(errorMsg);
  const isRateLimit = RATE_LIMIT_RE.test(errorMsg) || AFFORDABILITY_RE.test(errorMsg);
+  const isUnsupportedModel =
+    UNSUPPORTED_MODEL_MODEL_RE.test(errorMsg) &&
+    UNSUPPORTED_MODEL_INDICATOR_RE.test(errorMsg) &&
+    UNSUPPORTED_MODEL_SCOPE_RE.test(errorMsg);
+
+  // 0. Unsupported model (account/plan entitlement rejection) — checked before
+  //    `permanent` because PERMANENT_RE also matches /account/i and would
+  //    otherwise swallow these errors, blocking the blocklist-driven fallback.
+  //    Rate limit still wins when both patterns appear (a throttled account is
+  //    not an entitlement failure).
+  if (isUnsupportedModel && !isRateLimit) {
+    return { kind: "unsupported-model" };
+  }

  // 1. Permanent — but rate limit takes precedence
  if (isPermanent && !isRateLimit) {
--- a/src/resources/extensions/sf/milestone-summary-classifier.ts
+++ b/src/resources/extensions/sf/milestone-summary-classifier.ts
@ -0,0 +1,42 @@
+/**
+ * Shared milestone SUMMARY classifier.
+ *
+ * SUMMARY presence alone is not enough to prove milestone completion: recovery
+ * and blocker paths also write SUMMARY files. Keep this leaf module free of
+ * state/auto imports so state derivation, dispatch guards, and recovery can
+ * share one definition without cycles.
+ */
+
+import { splitFrontmatter, parseFrontmatterMap } from "../shared/frontmatter.js";
+import { isClosedStatus } from "./status-guards.js";
+
+export type MilestoneSummaryOutcome = "success" | "failure" | "unknown";
+
+export function classifyMilestoneSummaryContent(content: string): MilestoneSummaryOutcome {
+  const [fmLines] = splitFrontmatter(content);
+  const fm = fmLines ? parseFrontmatterMap(fmLines) : null;
+  const rawStatus = typeof fm?.status === "string" ? fm.status.trim().toLowerCase() : "";
+  if (rawStatus) {
+    if (isClosedStatus(rawStatus)) return "success";
+    if (["active", "pending", "blocked", "failed", "failure", "incomplete"].includes(rawStatus)) {
+      return "failure";
+    }
+  }
+
+  const failureSignal =
+    /(?:^|\n)\s*#\s*BLOCKER\b/i.test(content)
+    || /auto-mode recovery failed/i.test(content)
+    || /verification\s+failed/i.test(content)
+    || /(?:^|\n)\s*(?:status|verdict|outcome|result)\s*[:=-]\s*not complete\b/i.test(content);
+  if (failureSignal) return "failure";
+  return "unknown";
+}
+
+/**
+ * Legacy-compatible terminal check for state derivation.
+ * Unknown summaries remain terminal to preserve old handwritten SUMMARY files;
+ * explicit failure summaries do not.
+ */
+export function isTerminalMilestoneSummaryContent(content: string): boolean {
+  return classifyMilestoneSummaryContent(content) !== "failure";
+}
--- a/src/resources/extensions/sf/state.ts
+++ b/src/resources/extensions/sf/state.ts
@ -37,6 +37,7 @@ import {
 import { findMilestoneIds } from './milestone-ids.js';
 import { loadQueueOrder, sortByQueueOrder } from './queue-order.js';
 import { isClosedStatus, isDeferredStatus } from './status-guards.js';
+import { isTerminalMilestoneSummaryContent } from './milestone-summary-classifier.js';
 import { nativeBatchParseGsdFiles, type BatchParsedFile } from './native-parser-bridge.js';

 import { join, resolve } from 'path';
@ -152,7 +153,7 @@ interface StateCache {
  timestamp: number;
 }

-const CACHE_TTL_MS = 100;
+const CACHE_TTL_MS = 5000;
 let _stateCache: StateCache | null = null;

 // ── Telemetry counters for derive-path observability ────────────────────────
@ -1137,20 +1138,29 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
    const rc = rf ? await cachedLoadFile(rf) : null;
    if (!rc) {
      const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (sf) completeMilestoneIds.add(mid);
+      if (sf) {
+        const sc = await cachedLoadFile(sf);
+        if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
+      }
      continue;
    }
    const rmap = parseRoadmap(rc);
    roadmapCache.set(mid, rmap);
    if (!isMilestoneComplete(rmap)) {
-      // Summary is the terminal artifact — if it exists, the milestone is
+      // Summary is the terminal artifact — if it exists and is terminal, the milestone is
      // complete even when roadmap checkboxes weren't ticked (#864).
      const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (sf) completeMilestoneIds.add(mid);
+      if (sf) {
+        const sc = await cachedLoadFile(sf);
+        if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
+      }
      continue;
    }
    const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
-    if (sf) completeMilestoneIds.add(mid);
+    if (sf) {
+      const sc = await cachedLoadFile(sf);
+      if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
+    }
  }

  // Phase 2: Build registry using cached roadmaps (no re-parsing or re-reading)
@ -1174,16 +1184,19 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
    const roadmap = roadmapCache.get(mid) ?? null;

    if (!roadmap) {
-      // No roadmap — check if a summary exists (completed milestone without roadmap)
+      // No roadmap — check if a terminal summary exists (completed milestone without roadmap)
      const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
      if (summaryFile) {
        const summaryContent = await cachedLoadFile(summaryFile);
-        const summaryTitle = summaryContent
-          ? (parseSummary(summaryContent).title || mid)
-          : mid;
-        registry.push({ id: mid, title: summaryTitle, status: 'complete' });
-        completeMilestoneIds.add(mid);
-        continue;
+        if (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent)) {
+          const summaryTitle = summaryContent
+            ? (parseSummary(summaryContent).title || mid)
+            : mid;
+          registry.push({ id: mid, title: summaryTitle, status: 'complete' });
+          completeMilestoneIds.add(mid);
+          continue;
+        }
+        // Failure summary — milestone is not yet done; fall through to active/pending logic
      }
      // Ghost milestone (only META.json, no CONTEXT/ROADMAP/SUMMARY) — skip entirely
      if (isGhostMilestone(basePath, mid)) continue;
@ -1240,11 +1253,16 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
      const needsRevalidation = !validationTerminal || verdict === 'needs-remediation';

      if (summaryFile) {
-        // Summary exists → milestone is complete regardless of validation state.
-        // The summary is the terminal artifact (#864).
-        registry.push({ id: mid, title, status: 'complete' });
-      } else if (needsRevalidation && !activeMilestoneFound) {
-        // No summary and needs (re-)validation → validating-milestone
+        const summaryContent = await cachedLoadFile(summaryFile);
+        if (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent)) {
+          // Terminal summary → milestone is complete. The summary is the terminal artifact (#864).
+          registry.push({ id: mid, title, status: 'complete' });
+          continue;
+        }
+        // Failure summary — fall through to re-validation / active logic below
+      }
+      if (needsRevalidation && !activeMilestoneFound) {
+        // No terminal summary and needs (re-)validation → validating-milestone
        activeMilestone = { id: mid, title };
        activeRoadmap = roadmap;
        activeMilestoneFound = true;
@ -1262,10 +1280,11 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
        registry.push({ id: mid, title, status: 'complete' });
      }
    } else {
-      // Roadmap slices not all checked — but if a summary exists, the milestone
-      // is still complete. The summary is the terminal artifact (#864).
+      // Roadmap slices not all checked — but if a terminal summary exists, the
+      // milestone is still complete. The summary is the terminal artifact (#864).
      const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (summaryFile) {
+      const summaryContent = summaryFile ? await cachedLoadFile(summaryFile) : null;
+      if (summaryFile && (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent))) {
        registry.push({ id: mid, title, status: 'complete' });
      } else if (!activeMilestoneFound) {
        // Check milestone-level dependencies before promoting to active.