port gsd2: blocked-models gate, milestone-summary classifier, unsupported-model recovery

blocked-models.ts (new):
  Persistent per-project blocklist at .sf/runtime/blocked-models.json.
  loadBlockedModels / isModelBlocked / blockModel (file-lock-safe write).

milestone-summary-classifier.ts (new):
  classifyMilestoneSummaryContent → "success" | "failure" | "unknown".
  isTerminalMilestoneSummaryContent: failure summaries are NOT terminal —
  lets auto-mode re-enter a milestone after a failed recovery summary.

state.ts:
  Phase 1 (completeMilestoneIds) and Phase 2 (registry) now check
  isTerminalMilestoneSummaryContent before treating a SUMMARY as complete.
  A failure SUMMARY no longer prematurely parks a milestone.

error-classifier.ts:
  Add "unsupported-model" ErrorClass kind with regex detection
  (model + not-supported/unavailable/no-access + account/plan/tier).
  Checked before "permanent" so /account/i in PERMANENT_RE doesn't swallow it.

auto-model-selection.ts:
  Wire isModelBlocked() gate in selectAndApplyModel candidate loop:
  skips provider-rejected models and continues to fallbacks.

bootstrap/agent-end-recovery.ts:
  Handle cls.kind === "unsupported-model": blockModel(), try fallback chain
  skipping already-blocked models, pause if no usable fallback.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-25 10:13:27 +02:00
parent 6cb6de4fd2
commit 5887ea3fd1
6 changed files with 273 additions and 25 deletions

View file

@ -19,6 +19,7 @@ import { logWarning } from "./workflow-logger.js";
import { resolveUokFlags } from "./uok/flags.js";
import { applyModelPolicyFilter } from "./uok/model-policy.js";
import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-mcp.js";
import { isModelBlocked } from "./blocked-models.js";
/**
* Thrown when the model-policy gate rejects every candidate model for a unit
@ -446,6 +447,18 @@ export async function selectAndApplyModel(
attemptedPolicyEligible = true;
}
// Skip models the provider has previously rejected for this account
// (issue #4513). The block is persisted in .sf/runtime/blocked-models.json
// so it survives /sf auto restarts — without this, the same dead model
// gets reselected after every restart.
if (isModelBlocked(basePath, model.provider, model.id)) {
ctx.ui.notify(
`Skipping blocked model ${model.provider}/${model.id} (provider rejected it for this account).`,
"warning",
);
continue;
}
// Warn if the ID is ambiguous across providers
if (!modelId.includes("/")) {
const providers = availableModels.filter(m => m.id === modelId).map(m => m.provider);

View file

@ -0,0 +1,98 @@
// SF — Persistent per-project blocklist of provider/model pairs that the
// provider has rejected at request time for account entitlement reasons.
//
// Lives at `.sf/runtime/blocked-models.json` so the block survives /sf auto
// restarts. Auto-mode model selection skips blocked entries; agent-end
// recovery adds entries when a runtime rejection is classified as
// `unsupported-model`. See issue #4513.
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { sfRoot } from "./paths.js";
import { withFileLockSync } from "./file-lock.js";
export interface BlockedModelEntry {
provider: string;
id: string;
reason: string;
blockedAt: number;
}
interface BlockedModelsFile {
version: 1;
blocked: BlockedModelEntry[];
}
function blockedModelsPath(basePath: string): string {
return join(sfRoot(basePath), "runtime", "blocked-models.json");
}
function modelKey(provider: string, id: string): string {
return `${provider.toLowerCase()}/${id.toLowerCase()}`;
}
function readFileSafe(path: string): BlockedModelsFile {
if (!existsSync(path)) return { version: 1, blocked: [] };
try {
const raw = readFileSync(path, "utf-8");
const parsed = JSON.parse(raw) as Partial<BlockedModelsFile>;
if (!parsed || !Array.isArray(parsed.blocked)) {
return { version: 1, blocked: [] };
}
const blocked = parsed.blocked.filter(
(e): e is BlockedModelEntry =>
!!e && typeof e.provider === "string" && typeof e.id === "string",
);
return { version: 1, blocked };
} catch {
// Corrupted JSON: treat as empty so a bad file never blocks dispatch.
return { version: 1, blocked: [] };
}
}
export function loadBlockedModels(basePath: string): BlockedModelEntry[] {
return readFileSafe(blockedModelsPath(basePath)).blocked;
}
export function isModelBlocked(
basePath: string,
provider: string | undefined,
id: string | undefined,
): boolean {
if (!provider || !id) return false;
const target = modelKey(provider, id);
return loadBlockedModels(basePath).some(
(e) => modelKey(e.provider, e.id) === target,
);
}
export function blockModel(
basePath: string,
provider: string,
id: string,
reason: string,
): void {
const path = blockedModelsPath(basePath);
mkdirSync(dirname(path), { recursive: true });
// Ensure the file exists before we try to lock it — proper-lockfile requires
// the target path to exist (file-lock.ts falls through to an unlocked call
// otherwise).
if (!existsSync(path)) {
writeFileSync(path, JSON.stringify({ version: 1, blocked: [] }, null, 2) + "\n", "utf-8");
}
withFileLockSync(path, () => {
const current = readFileSafe(path);
const target = modelKey(provider, id);
if (current.blocked.some((e) => modelKey(e.provider, e.id) === target)) {
return;
}
const next: BlockedModelsFile = {
version: 1,
blocked: [
...current.blocked,
{ provider, id, reason, blockedAt: Date.now() },
],
};
writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");
});
}

View file

@ -1,6 +1,7 @@
import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-coding-agent";
import { logWarning } from "../workflow-logger.js";
import { blockModel, isModelBlocked } from "../blocked-models.js";
import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto, setCurrentDispatchedModelId } from "../auto.js";
import { getNextFallbackModel, resolveModelWithFallbacksForUnit, resolvePersistModelChanges } from "../preferences.js";
@ -142,6 +143,61 @@ export async function handleAgentEnd(
}
}
// ── 1c. Unsupported-model: provider rejected this model for the current
// account/plan at request time (#4513). Persist a block so the
// same dead model isn't reselected on the next /sf auto restart,
// then try a fallback before pausing.
if (cls.kind === "unsupported-model") {
const dash = getAutoDashboardData();
const rejectedProvider = ctx.model?.provider;
const rejectedId = ctx.model?.id;
if (dash.basePath && rejectedProvider && rejectedId) {
try {
blockModel(dash.basePath, rejectedProvider, rejectedId, rawErrorMsg || "unsupported for account");
ctx.ui.notify(
`Blocked ${rejectedProvider}/${rejectedId} for this project — provider rejected it for the current account.`,
"warning",
);
} catch (err) {
const m = err instanceof Error ? err.message : String(err);
logWarning("bootstrap", `Failed to persist blocked model: ${m}`);
}
}
// Try configured fallback chain, skipping anything already blocked.
if (dash.currentUnit && dash.basePath) {
const modelConfig = resolveModelWithFallbacksForUnit(dash.currentUnit.type);
if (modelConfig && modelConfig.fallbacks.length > 0) {
const availableModels = ctx.modelRegistry.getAvailable();
let cursorModelId: string | undefined = ctx.model?.id;
while (true) {
const nextModelId = getNextFallbackModel(cursorModelId, modelConfig);
if (!nextModelId) break;
if (isModelBlocked(dash.basePath, ctx.model?.provider, nextModelId)) {
cursorModelId = nextModelId;
continue;
}
const modelToSet = resolveModelId(nextModelId, availableModels, ctx.model?.provider);
if (modelToSet && !isModelBlocked(dash.basePath, modelToSet.provider, modelToSet.id)) {
const persistModelChanges = resolvePersistModelChanges();
const ok = await pi.setModel(modelToSet, { persist: persistModelChanges });
if (ok) {
setCurrentDispatchedModelId({ provider: modelToSet.provider, id: modelToSet.id });
ctx.ui.notify(`Switched to unblocked fallback: ${nextModelId} and resuming.`, "info");
pi.sendMessage({ customType: "sf-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
return;
}
}
cursorModelId = nextModelId;
}
}
}
// No usable fallback — pause
await pauseAutoForProviderError(pi, `Model unsupported for this account${errorDetail}`);
return;
}
// ── 2. Decide & Act ──────────────────────────────────────────────────
// --- Network errors: same-model retry with backoff ---

View file

@ -13,12 +13,13 @@
// ── ErrorClass discriminated union ──────────────────────────────────────────
export type ErrorClass =
| { kind: "network"; retryAfterMs: number }
| { kind: "rate-limit"; retryAfterMs: number }
| { kind: "server"; retryAfterMs: number }
| { kind: "stream"; retryAfterMs: number }
| { kind: "connection"; retryAfterMs: number }
| { kind: "network"; retryAfterMs: number }
| { kind: "rate-limit"; retryAfterMs: number }
| { kind: "server"; retryAfterMs: number }
| { kind: "stream"; retryAfterMs: number }
| { kind: "connection"; retryAfterMs: number }
| { kind: "model-error" }
| { kind: "unsupported-model" }
| { kind: "permanent" }
| { kind: "unknown" };
@ -45,6 +46,12 @@ export function resetRetryState(state: RetryState): void {
const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
// Include provider-specific quota-window phrasing like "hit your limit", "usage limit", "quota reached"
const RATE_LIMIT_RE = /rate.?limit|too many requests|429|hit your limit|usage limit|quota (?:reached|hit)|limit.*resets?/i;
// Unsupported-model: provider rejected the model for the current account/plan (#4513).
// Checked before `permanent` because PERMANENT_RE also matches /account/i.
const UNSUPPORTED_MODEL_MODEL_RE = /\b(?:model|deployment)\b/i;
const UNSUPPORTED_MODEL_INDICATOR_RE =
/\bnot support(?:ed|s)?\b|\bunsupported\b|\bnot available\b|\bunavailable\b|\bno access\b|\bdoes(?:n['']t| not) (?:have access|support)\b|\bnot authori[sz]ed\b/i;
const UNSUPPORTED_MODEL_SCOPE_RE = /\b(?:account|plan|tier|subscription)\b/i;
// OpenRouter affordability-style quota errors should be treated as transient
// so core retry logic can lower maxTokens and continue in-session.
const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;
@ -72,6 +79,19 @@ const RESET_DELAY_RE = /reset in (\d+)s/i;
export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
const isPermanent = PERMANENT_RE.test(errorMsg);
const isRateLimit = RATE_LIMIT_RE.test(errorMsg) || AFFORDABILITY_RE.test(errorMsg);
const isUnsupportedModel =
UNSUPPORTED_MODEL_MODEL_RE.test(errorMsg) &&
UNSUPPORTED_MODEL_INDICATOR_RE.test(errorMsg) &&
UNSUPPORTED_MODEL_SCOPE_RE.test(errorMsg);
// 0. Unsupported model (account/plan entitlement rejection) — checked before
// `permanent` because PERMANENT_RE also matches /account/i and would
// otherwise swallow these errors, blocking the blocklist-driven fallback.
// Rate limit still wins when both patterns appear (a throttled account is
// not an entitlement failure).
if (isUnsupportedModel && !isRateLimit) {
return { kind: "unsupported-model" };
}
// 1. Permanent — but rate limit takes precedence
if (isPermanent && !isRateLimit) {

View file

@ -0,0 +1,42 @@
/**
* Shared milestone SUMMARY classifier.
*
* SUMMARY presence alone is not enough to prove milestone completion: recovery
* and blocker paths also write SUMMARY files. Keep this leaf module free of
* state/auto imports so state derivation, dispatch guards, and recovery can
* share one definition without cycles.
*/
import { splitFrontmatter, parseFrontmatterMap } from "../shared/frontmatter.js";
import { isClosedStatus } from "./status-guards.js";
export type MilestoneSummaryOutcome = "success" | "failure" | "unknown";
export function classifyMilestoneSummaryContent(content: string): MilestoneSummaryOutcome {
const [fmLines] = splitFrontmatter(content);
const fm = fmLines ? parseFrontmatterMap(fmLines) : null;
const rawStatus = typeof fm?.status === "string" ? fm.status.trim().toLowerCase() : "";
if (rawStatus) {
if (isClosedStatus(rawStatus)) return "success";
if (["active", "pending", "blocked", "failed", "failure", "incomplete"].includes(rawStatus)) {
return "failure";
}
}
const failureSignal =
/(?:^|\n)\s*#\s*BLOCKER\b/i.test(content)
|| /auto-mode recovery failed/i.test(content)
|| /verification\s+failed/i.test(content)
|| /(?:^|\n)\s*(?:status|verdict|outcome|result)\s*[:=-]\s*not complete\b/i.test(content);
if (failureSignal) return "failure";
return "unknown";
}
/**
* Legacy-compatible terminal check for state derivation.
* Unknown summaries remain terminal to preserve old handwritten SUMMARY files;
* explicit failure summaries do not.
*/
export function isTerminalMilestoneSummaryContent(content: string): boolean {
return classifyMilestoneSummaryContent(content) !== "failure";
}

View file

@ -37,6 +37,7 @@ import {
import { findMilestoneIds } from './milestone-ids.js';
import { loadQueueOrder, sortByQueueOrder } from './queue-order.js';
import { isClosedStatus, isDeferredStatus } from './status-guards.js';
import { isTerminalMilestoneSummaryContent } from './milestone-summary-classifier.js';
import { nativeBatchParseGsdFiles, type BatchParsedFile } from './native-parser-bridge.js';
import { join, resolve } from 'path';
@ -152,7 +153,7 @@ interface StateCache {
timestamp: number;
}
const CACHE_TTL_MS = 100;
const CACHE_TTL_MS = 5000;
let _stateCache: StateCache | null = null;
// ── Telemetry counters for derive-path observability ────────────────────────
@ -1137,20 +1138,29 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
const rc = rf ? await cachedLoadFile(rf) : null;
if (!rc) {
const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
if (sf) completeMilestoneIds.add(mid);
if (sf) {
const sc = await cachedLoadFile(sf);
if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
}
continue;
}
const rmap = parseRoadmap(rc);
roadmapCache.set(mid, rmap);
if (!isMilestoneComplete(rmap)) {
// Summary is the terminal artifact — if it exists, the milestone is
// Summary is the terminal artifact — if it exists and is terminal, the milestone is
// complete even when roadmap checkboxes weren't ticked (#864).
const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
if (sf) completeMilestoneIds.add(mid);
if (sf) {
const sc = await cachedLoadFile(sf);
if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
}
continue;
}
const sf = resolveMilestoneFile(basePath, mid, "SUMMARY");
if (sf) completeMilestoneIds.add(mid);
if (sf) {
const sc = await cachedLoadFile(sf);
if (!sc || isTerminalMilestoneSummaryContent(sc)) completeMilestoneIds.add(mid);
}
}
// Phase 2: Build registry using cached roadmaps (no re-parsing or re-reading)
@ -1174,16 +1184,19 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
const roadmap = roadmapCache.get(mid) ?? null;
if (!roadmap) {
// No roadmap — check if a summary exists (completed milestone without roadmap)
// No roadmap — check if a terminal summary exists (completed milestone without roadmap)
const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
if (summaryFile) {
const summaryContent = await cachedLoadFile(summaryFile);
const summaryTitle = summaryContent
? (parseSummary(summaryContent).title || mid)
: mid;
registry.push({ id: mid, title: summaryTitle, status: 'complete' });
completeMilestoneIds.add(mid);
continue;
if (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent)) {
const summaryTitle = summaryContent
? (parseSummary(summaryContent).title || mid)
: mid;
registry.push({ id: mid, title: summaryTitle, status: 'complete' });
completeMilestoneIds.add(mid);
continue;
}
// Failure summary — milestone is not yet done; fall through to active/pending logic
}
// Ghost milestone (only META.json, no CONTEXT/ROADMAP/SUMMARY) — skip entirely
if (isGhostMilestone(basePath, mid)) continue;
@ -1240,11 +1253,16 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
const needsRevalidation = !validationTerminal || verdict === 'needs-remediation';
if (summaryFile) {
// Summary exists → milestone is complete regardless of validation state.
// The summary is the terminal artifact (#864).
registry.push({ id: mid, title, status: 'complete' });
} else if (needsRevalidation && !activeMilestoneFound) {
// No summary and needs (re-)validation → validating-milestone
const summaryContent = await cachedLoadFile(summaryFile);
if (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent)) {
// Terminal summary → milestone is complete. The summary is the terminal artifact (#864).
registry.push({ id: mid, title, status: 'complete' });
continue;
}
// Failure summary — fall through to re-validation / active logic below
}
if (needsRevalidation && !activeMilestoneFound) {
// No terminal summary and needs (re-)validation → validating-milestone
activeMilestone = { id: mid, title };
activeRoadmap = roadmap;
activeMilestoneFound = true;
@ -1262,10 +1280,11 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
registry.push({ id: mid, title, status: 'complete' });
}
} else {
// Roadmap slices not all checked — but if a summary exists, the milestone
// is still complete. The summary is the terminal artifact (#864).
// Roadmap slices not all checked — but if a terminal summary exists, the
// milestone is still complete. The summary is the terminal artifact (#864).
const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
if (summaryFile) {
const summaryContent = summaryFile ? await cachedLoadFile(summaryFile) : null;
if (summaryFile && (!summaryContent || isTerminalMilestoneSummaryContent(summaryContent))) {
registry.push({ id: mid, title, status: 'complete' });
} else if (!activeMilestoneFound) {
// Check milestone-level dependencies before promoting to active.