sf snapshot: uncommitted changes after 30m inactivity

2026-05-16 17:30:35 +02:00 · 2026-05-16 17:30:35 +02:00 · effbb75f83
commit effbb75f83
parent b5764af27b
6 changed files with 529 additions and 62 deletions
--- a/src/cli.ts
+++ b/src/cli.ts
@ -801,15 +801,20 @@ if (cliFlags.maintain) {
 		const { computeBenchmarkCoverage, writeBenchmarkCoverage } = await import(
 			"./resources/extensions/sf/benchmark-coverage.js"
 		);
-		await runModelCatalogRefreshIfStale(process.cwd(), getKeyManagerAuthStorage());
+		const { runProviderQuotaRefreshIfStale } = await import(
+			"./resources/extensions/sf/provider-quota-cache.js"
+		);
+		const auth = getKeyManagerAuthStorage();
+		await runModelCatalogRefreshIfStale(process.cwd(), auth);
 		await runGeminiCatalogRefreshIfStale(process.cwd());
 		await runOpenaiCodexCatalogRefreshIfStale(process.cwd());
+		await runProviderQuotaRefreshIfStale(process.cwd(), auth);
 		const prefs = loadEffectiveSFPreferences()?.preferences ?? {};
 		const coverage = computeBenchmarkCoverage(prefs);
 		writeBenchmarkCoverage(coverage);
 		const ms = Date.now() - startedAt;
 		process.stdout.write(
-			`[sf --maintain] catalog refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
+			`[sf --maintain] catalog + quota refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
 		);
 	} catch (err) {
 		process.stderr.write(
--- a/src/headless-usage.ts
+++ b/src/headless-usage.ts
@ -1,23 +1,26 @@
 /**
 * headless-usage.ts — `sf headless usage`
 *
- * Purpose: expose live LLM-provider usage data (account tier, project, per-model
- * quota usage with reset windows) via the headless CLI so operators and CI can
- * see capacity state without launching the interactive UI.
+ * Live LLM-provider subscription quota state for every provider with a
+ * documented introspection endpoint:
 *
- * Today this covers the gemini-cli provider (the most quota-sensitive surface
- * because of AI Ultra's per-model windowed quotas). Other providers can be
- * added by extending the snapshot helper as their introspection APIs are
- * wired into dedicated provider packages.
+ *   - kimi-coding         GET https://api.kimi.com/coding/v1/usages
+ *   - openrouter          GET https://openrouter.ai/api/v1/credits
+ *   - minimax             GET https://api.minimax.io/v1/token_plan/remains
+ *   - zai                 GET https://api.z.ai/api/monitor/usage/quota/limit
+ *   - google-gemini-cli   via snapshotGeminiCliAccount (OAuth Code Assist)
+ *
+ * Each call goes through `provider-quota-cache.js` which writes a unified
+ * representation to ~/.sf/provider-quota.json (15-minute TTL). This command
+ * forces a refresh, then prints either a compact human table or JSON.
+ *
+ * Providers without a documented quota endpoint (mistral, ollama-cloud,
+ * opencode, opencode-go, xiaomi) are listed as "unavailable" with a short
+ * note so users see exactly which subs SF can introspect today.
 *
 * Consumer: headless.ts when command === "usage".
 */

-import {
-	type GeminiAccountSnapshot,
-	snapshotGeminiCliAccount,
-} from "@singularity-forge/google-gemini-cli-provider";
-
 export interface HandleUsageOptions {
 	json?: boolean;
 }
@ -26,77 +29,99 @@ export interface HandleUsageResult {
 	exitCode: number;
 }

+const NO_API_PROVIDERS: ReadonlyArray<{ id: string; reason: string }> = [
+	{ id: "mistral", reason: "no public quota endpoint — console.mistral.ai" },
+	{ id: "ollama-cloud", reason: "WorkOS dashboard only — ollama.com/settings" },
+	{ id: "opencode", reason: "no public quota endpoint" },
+	{ id: "opencode-go", reason: "no public quota endpoint" },
+	{ id: "xiaomi", reason: "no public quota endpoint — platform.xiaomimimo.com" },
+];
+
 /**
- * Render a snapshot as a compact text table (default) or as JSON for machine
- * consumers. Always writes to stdout; never throws.
+ * Render the unified provider-quota snapshot as a compact text table (default)
+ * or as JSON for machine consumers. Always writes to stdout; never throws.
 */
 export async function handleUsage(
 	cwd: string,
 	options: HandleUsageOptions = {},
 ): Promise<HandleUsageResult> {
-	let snapshot: GeminiAccountSnapshot | null;
+	const { runProviderQuotaRefreshIfStale, getAllProviderQuotaEntries } =
+		await import("./resources/extensions/sf/provider-quota-cache.js");
+	const { getKeyManagerAuthStorage } = await import(
+		"./resources/extensions/sf/key-manager.js"
+	);
+
+	const auth = getKeyManagerAuthStorage();
 	try {
-		snapshot = await snapshotGeminiCliAccount(cwd);
+		await runProviderQuotaRefreshIfStale(cwd, auth);
 	} catch (err) {
-		const msg = err instanceof Error ? err.message : String(err);
-		const payload = {
-			provider: "google-gemini-cli",
-			ok: false,
-			error: msg,
-		};
-		process.stdout.write(
-			options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`,
-		);
-		return { exitCode: 1 };
+		// Fall through to display whatever's cached, even on refresh failure.
+		if (!options.json) {
+			process.stderr.write(
+				`warning: quota refresh failed: ${err instanceof Error ? err.message : String(err)}\n`,
+			);
+		}
 	}

-	if (!snapshot) {
-		const payload = {
-			provider: "google-gemini-cli",
-			ok: false,
-			error:
-				"No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.",
-		};
-		process.stdout.write(
-			options.json
-				? `${JSON.stringify(payload)}\n`
-				: `${payload.error}\n`,
-		);
-		return { exitCode: 1 };
-	}
+	const entries = getAllProviderQuotaEntries();

 	if (options.json) {
-		process.stdout.write(
-			`${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`,
-		);
+		const payload = {
+			ok: true,
+			providers: entries,
+			unavailable: NO_API_PROVIDERS,
+		};
+		process.stdout.write(`${JSON.stringify(payload)}\n`);
 		return { exitCode: 0 };
 	}

 	const lines: string[] = [];
-	lines.push("Gemini CLI usage");
+	lines.push("Provider quota state");
 	lines.push("");
-	lines.push(`  project:    ${snapshot.projectId}`);
-	if (snapshot.userTierId || snapshot.userTierName) {
+
+	const providerIds = Object.keys(entries).sort();
+	if (providerIds.length === 0) {
 		lines.push(
-			`  userTier:   ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`,
+			"  (no providers have a quota snapshot yet — check API keys are configured)",
 		);
 	}
-	if (snapshot.paidTier?.id || snapshot.paidTier?.name) {
-		lines.push(
-			`  paidTier:   ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? ` — ${snapshot.paidTier.name}` : ""}`,
+
+	for (const providerId of providerIds) {
+		const entry = entries[providerId];
+		if (!entry?.ok) {
+			lines.push(
+				`  ${providerId.padEnd(20)} — error: ${entry?.error ?? "unknown"}`,
+			);
+			continue;
+		}
+		lines.push(`  ${providerId}  (fetched ${entry.fetchedAt})`);
+		const windows = entry.windows ?? [];
+		if (windows.length === 0) {
+			lines.push("    (no windows reported)");
+			continue;
+		}
+		const labelW = Math.max(
+			16,
+			...windows.map((w) => (w.label ?? "").length),
 		);
+		for (const w of windows) {
+			const pct =
+				typeof w.usedFraction === "number"
+					? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%`
+					: "  ?  ";
+			const reset = w.resetHint ? `  reset=${w.resetHint}` : "";
+			lines.push(
+				`    ${String(w.label ?? "").padEnd(labelW)}  used=${pct}${reset}`,
+			);
+		}
 	}
+
 	lines.push("");
-	lines.push("  Per-model quota:");
-	const modelW = Math.max(
-		20,
-		...snapshot.models.map((m) => m.modelId.length),
-	);
-	for (const m of snapshot.models) {
-		const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5);
-		const reset = m.resetTime ?? "-";
-		lines.push(`    ${m.modelId.padEnd(modelW)}  used=${usedPct}%  reset=${reset}`);
+	lines.push("No public quota API for:");
+	for (const p of NO_API_PROVIDERS) {
+		lines.push(`  ${p.id.padEnd(20)}  ${p.reason}`);
 	}
+
 	process.stdout.write(`${lines.join("\n")}\n`);
 	return { exitCode: 0 };
 }
--- a/src/resources/extensions/sf/auto-post-unit.js
+++ b/src/resources/extensions/sf/auto-post-unit.js
@ -201,7 +201,16 @@ export function detectRogueFileWrites(unitType, unitId, basePath) {
 		if (!summaryPath || !existsSync(summaryPath)) return [];
 		const dbRow = getTask(mid, sid, tid);
 		if (!dbRow || dbRow.status !== "complete") {
-			rogues.push({ path: summaryPath, unitType, unitId });
+			// Auto-remediate: SUMMARY exists on disk but DB is stale — sync DB to
+			// match filesystem instead of reporting as rogue (#3633).
+			// This handles the case where a commit landed without going through
+			// complete_task (e.g. batch commit from IDE, subagent, or direct git).
+			try {
+				updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString());
+			} catch {
+				// If DB update fails, fall back to rogue detection so the issue is visible
+				rogues.push({ path: summaryPath, unitType, unitId });
+			}
 		}
 	} else if (unitType === "complete-slice") {
 		if (!mid || !sid) return [];
--- a/src/resources/extensions/sf/bootstrap/register-hooks.js
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.js
@ -560,6 +560,24 @@ export function registerHooks(pi, ecosystemHandlers = []) {
 		} catch {
 			/* non-fatal — benchmark audit must never block session start */
 		}
+		// Refresh per-provider subscription quota state in the background.
+		// Today this covers the 5 providers with documented introspection
+		// endpoints (kimi-coding, openrouter, minimax, zai, google-gemini-cli);
+		// other providers are absent from the cache and consumers treat
+		// "no entry" as "unknown". TTL is 15 minutes — quota state changes
+		// much faster than catalogs, so we refresh more aggressively.
+		try {
+			const { scheduleProviderQuotaRefresh } = await import(
+				"../provider-quota-cache.js"
+			);
+			const { getKeyManagerAuthStorage } = await import("../key-manager.js");
+			scheduleProviderQuotaRefresh(
+				process.cwd(),
+				getKeyManagerAuthStorage(),
+			);
+		} catch {
+			/* non-fatal — quota refresh must never block session start */
+		}
 		// Detect drift in source-of-truth markdown files since last session.
 		try {
 			const { detectMdFileDrift, formatDriftReport } = await import(
--- a/src/resources/extensions/sf/provider-quota-cache.js
+++ b/src/resources/extensions/sf/provider-quota-cache.js
@ -0,0 +1,388 @@
+/**
+ * provider-quota-cache.js — fetch and cache subscription quota state per provider.
+ *
+ * For the five providers with a real quota introspection endpoint
+ * (kimi-coding, openrouter, minimax, zai, google-gemini-cli) this module
+ * fetches usage / remaining-quota data and caches it globally at
+ * ~/.sf/provider-quota.json with a 15-minute TTL.
+ *
+ * The other allowed providers (mistral, ollama-cloud, opencode, opencode-go,
+ * xiaomi) have no documented programmatic quota API today. They are simply
+ * absent from the cache — consumers should treat "no entry" as "unknown" and
+ * fall back to local-dispatch counters or accept routing without quota signal.
+ *
+ * Designed for two consumers:
+ *   - sf headless usage: display quota for all providers at once.
+ *   - routing layer (phase 2): bias dispatch toward under-used subs to
+ *     maximize subscription utilization ("spend the subs").
+ */
+import {
+	existsSync,
+	mkdirSync,
+	readFileSync,
+	writeFileSync,
+} from "node:fs";
+import { dirname, join } from "node:path";
+import { sfHome } from "./sf-home.js";
+
+const QUOTA_TTL_MS = 15 * 60 * 1000;
+
+/** Providers that have a documented quota endpoint we can call with an API key. */
+export const QUOTA_CAPABLE_PROVIDER_IDS = [
+	"kimi-coding",
+	"openrouter",
+	"minimax",
+	"zai",
+	"google-gemini-cli",
+];
+
+function quotaFilePath() {
+	return join(sfHome(), "provider-quota.json");
+}
+
+/**
+ * @typedef {object} ProviderQuotaWindow
+ * @property {string} label — human-readable name ("Weekly", "5h rolling", "Monthly MCP", ...)
+ * @property {number} used — tokens / requests / credits consumed
+ * @property {number} limit — window cap
+ * @property {number} [usedFraction] — derived used/limit, clamped 0..1
+ * @property {string} [resetHint] — ISO timestamp or "N hours" depending on provider
+ *
+ * @typedef {object} ProviderQuotaEntry
+ * @property {boolean} ok — true if the most recent fetch succeeded
+ * @property {string} fetchedAt — ISO timestamp of the last successful fetch
+ * @property {string} [error] — error message when ok=false
+ * @property {ProviderQuotaWindow[]} windows — one or more rolling/period windows
+ * @property {Record<string, unknown>} [raw] — raw API response for debugging
+ */
+
+function clampFraction(used, limit) {
+	if (typeof used !== "number" || typeof limit !== "number" || limit <= 0) {
+		return undefined;
+	}
+	return Math.max(0, Math.min(1, used / limit));
+}
+
+// ─── Per-provider fetchers ───────────────────────────────────────────────────
+
+/**
+ * Kimi For Coding — GET https://api.kimi.com/coding/v1/usages with Bearer auth.
+ * Shape (from MoonshotAI/kimi-cli usage.py):
+ *   { usage: {limit, used|remaining, name}, limits: [{detail:{...}, window:{duration, timeUnit}}] }
+ */
+async function fetchKimiCodingQuota(apiKey) {
+	const res = await fetch("https://api.kimi.com/coding/v1/usages", {
+		method: "GET",
+		headers: { Authorization: `Bearer ${apiKey}` },
+	});
+	if (!res.ok) throw new Error(`HTTP ${res.status}`);
+	const payload = await res.json();
+	const windows = [];
+	const summary = payload?.usage;
+	if (summary && typeof summary === "object") {
+		const limit = typeof summary.limit === "number" ? summary.limit : 0;
+		let used = typeof summary.used === "number" ? summary.used : undefined;
+		if (used === undefined && typeof summary.remaining === "number") {
+			used = limit - summary.remaining;
+		}
+		if (typeof used === "number") {
+			windows.push({
+				label: String(summary.name ?? "Weekly limit"),
+				used,
+				limit,
+				usedFraction: clampFraction(used, limit),
+			});
+		}
+	}
+	if (Array.isArray(payload?.limits)) {
+		for (const item of payload.limits) {
+			const detail = item?.detail ?? item;
+			const window = item?.window;
+			const limit = typeof detail?.limit === "number" ? detail.limit : 0;
+			let used = typeof detail?.used === "number" ? detail.used : undefined;
+			if (used === undefined && typeof detail?.remaining === "number") {
+				used = limit - detail.remaining;
+			}
+			if (typeof used !== "number") continue;
+			const winLabel =
+				window?.duration && window?.timeUnit
+					? `${window.duration} ${window.timeUnit}`
+					: undefined;
+			windows.push({
+				label: String(detail?.name ?? winLabel ?? "rolling window"),
+				used,
+				limit,
+				usedFraction: clampFraction(used, limit),
+			});
+		}
+	}
+	return { windows, raw: payload };
+}
+
+/**
+ * OpenRouter — GET https://openrouter.ai/api/v1/credits with Bearer auth.
+ * Returns { data: { total_credits, total_usage } } where both are USD floats.
+ * Used relative to the user's purchased credits, not a rolling window.
+ */
+async function fetchOpenrouterQuota(apiKey) {
+	const res = await fetch("https://openrouter.ai/api/v1/credits", {
+		method: "GET",
+		headers: { Authorization: `Bearer ${apiKey}` },
+	});
+	if (!res.ok) throw new Error(`HTTP ${res.status}`);
+	const payload = await res.json();
+	const totalCredits = Number(payload?.data?.total_credits ?? 0);
+	const totalUsage = Number(payload?.data?.total_usage ?? 0);
+	const remaining = totalCredits - totalUsage;
+	return {
+		windows: [
+			{
+				label: "credits (USD)",
+				used: totalUsage,
+				limit: totalCredits || totalUsage, // avoid divide-by-zero
+				usedFraction:
+					totalCredits > 0
+						? clampFraction(totalUsage, totalCredits)
+						: undefined,
+				resetHint: remaining >= 0 ? `${remaining.toFixed(2)} USD remaining` : undefined,
+			},
+		],
+		raw: payload,
+	};
+}
+
+/**
+ * MiniMax — GET https://api.minimax.io/v1/token_plan/remains with Bearer auth.
+ * The 5h rolling window is the load-bearing limit; response shape varies but
+ * commonly: { remaining_tokens, total_tokens, reset_time, ... }.
+ */
+async function fetchMinimaxQuota(apiKey) {
+	const res = await fetch("https://api.minimax.io/v1/token_plan/remains", {
+		method: "GET",
+		headers: { Authorization: `Bearer ${apiKey}` },
+	});
+	if (!res.ok) throw new Error(`HTTP ${res.status}`);
+	const payload = await res.json();
+	const windows = [];
+	const remaining =
+		typeof payload?.remaining_tokens === "number"
+			? payload.remaining_tokens
+			: typeof payload?.remain === "number"
+				? payload.remain
+				: undefined;
+	const total =
+		typeof payload?.total_tokens === "number"
+			? payload.total_tokens
+			: typeof payload?.total === "number"
+				? payload.total
+				: undefined;
+	if (typeof remaining === "number" && typeof total === "number") {
+		const used = total - remaining;
+		windows.push({
+			label: "token plan",
+			used,
+			limit: total,
+			usedFraction: clampFraction(used, total),
+			resetHint: payload?.reset_time
+				? String(payload.reset_time)
+				: undefined,
+		});
+	}
+	return { windows, raw: payload };
+}
+
+/**
+ * Z.AI — GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
+ * Returns 5h token limit + MCP monthly quota per the opencode-mystatus tool.
+ * Field names vary; we accept common synonyms.
+ */
+async function fetchZaiQuota(apiKey) {
+	const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
+		method: "GET",
+		headers: { Authorization: `Bearer ${apiKey}` },
+	});
+	if (!res.ok) throw new Error(`HTTP ${res.status}`);
+	const payload = await res.json();
+	const windows = [];
+	const buckets = Array.isArray(payload?.data)
+		? payload.data
+		: Array.isArray(payload?.limits)
+			? payload.limits
+			: Array.isArray(payload)
+				? payload
+				: [];
+	for (const bucket of buckets) {
+		if (!bucket || typeof bucket !== "object") continue;
+		const limit =
+			typeof bucket.limit === "number"
+				? bucket.limit
+				: typeof bucket.total === "number"
+					? bucket.total
+					: undefined;
+		let used =
+			typeof bucket.used === "number"
+				? bucket.used
+				: typeof bucket.consumed === "number"
+					? bucket.consumed
+					: undefined;
+		if (used === undefined && typeof bucket.remaining === "number" && typeof limit === "number") {
+			used = limit - bucket.remaining;
+		}
+		if (typeof used !== "number" || typeof limit !== "number") continue;
+		windows.push({
+			label: String(bucket.name ?? bucket.type ?? "quota"),
+			used,
+			limit,
+			usedFraction: clampFraction(used, limit),
+			resetHint: bucket.reset_time
+				? String(bucket.reset_time)
+				: bucket.resetAt
+					? String(bucket.resetAt)
+					: undefined,
+		});
+	}
+	return { windows, raw: payload };
+}
+
+/**
+ * Google Gemini CLI — reuses snapshotGeminiCliAccount from the dedicated
+ * provider package. Maps the per-model quota windows into the same shape as
+ * the other providers' entries.
+ */
+async function fetchGeminiCliQuota(basePath) {
+	const { snapshotGeminiCliAccount } = await import(
+		"@singularity-forge/google-gemini-cli-provider"
+	);
+	const snapshot = await snapshotGeminiCliAccount(basePath);
+	if (!snapshot) throw new Error("no snapshot — gemini-cli not signed in");
+	const windows = (snapshot.models ?? []).map((m) => ({
+		label: m.modelId,
+		used: 1, // expressed as fraction directly
+		limit: 1,
+		usedFraction:
+			typeof m.usedFraction === "number" ? m.usedFraction : undefined,
+		resetHint: m.resetTime ? String(m.resetTime) : undefined,
+	}));
+	return { windows, raw: snapshot };
+}
+
+const FETCHERS = {
+	"kimi-coding": fetchKimiCodingQuota,
+	openrouter: fetchOpenrouterQuota,
+	minimax: fetchMinimaxQuota,
+	zai: fetchZaiQuota,
+};
+
+// ─── Cache I/O ───────────────────────────────────────────────────────────────
+
+function loadCache() {
+	const path = quotaFilePath();
+	if (!existsSync(path)) return { schemaVersion: 1, providers: {} };
+	try {
+		const parsed = JSON.parse(readFileSync(path, "utf-8"));
+		if (parsed?.schemaVersion === 1 && parsed?.providers) return parsed;
+		return { schemaVersion: 1, providers: {} };
+	} catch {
+		return { schemaVersion: 1, providers: {} };
+	}
+}
+
+function saveCache(cache) {
+	const path = quotaFilePath();
+	try {
+		mkdirSync(dirname(path), { recursive: true });
+		writeFileSync(path, `${JSON.stringify(cache, null, 2)}\n`, "utf-8");
+	} catch {
+		// Best-effort.
+	}
+}
+
+function isFresh(entry) {
+	if (!entry?.fetchedAt) return false;
+	return Date.now() - new Date(entry.fetchedAt).getTime() <= QUOTA_TTL_MS;
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/**
+ * Read the cached quota entry for a provider.
+ * Returns null if no entry exists or the entry is older than QUOTA_TTL_MS.
+ *
+ * @param {string} providerId
+ * @returns {ProviderQuotaEntry | null}
+ */
+export function getProviderQuotaState(providerId) {
+	const cache = loadCache();
+	const entry = cache.providers?.[providerId];
+	if (!entry) return null;
+	if (!isFresh(entry)) return null;
+	return entry;
+}
+
+/**
+ * Read all cached quota entries, fresh or stale, for display purposes.
+ * @returns {Record<string, ProviderQuotaEntry>}
+ */
+export function getAllProviderQuotaEntries() {
+	return loadCache().providers ?? {};
+}
+
+/**
+ * Awaitable refresh for all quota-capable providers that have a key in `auth`.
+ * Skips fresh entries (within TTL). Per-provider failures are recorded as
+ * { ok: false, error } but never thrown.
+ *
+ * @param {string} basePath — passed through to gemini-cli snapshot helper
+ * @param {{ getCredentialsForProvider: (id: string) => Array<{type: string, key?: string}> }} auth
+ */
+export async function runProviderQuotaRefreshIfStale(basePath, auth) {
+	const cache = loadCache();
+	cache.providers = cache.providers ?? {};
+	for (const providerId of QUOTA_CAPABLE_PROVIDER_IDS) {
+		if (isFresh(cache.providers[providerId])) continue;
+		const now = new Date().toISOString();
+		try {
+			let result;
+			if (providerId === "google-gemini-cli") {
+				result = await fetchGeminiCliQuota(basePath);
+			} else {
+				const fetcher = FETCHERS[providerId];
+				if (!fetcher) continue;
+				const creds = auth?.getCredentialsForProvider?.(providerId) ?? [];
+				const apiKey = creds.find((c) => c.type === "api_key" && c.key)?.key;
+				if (!apiKey) {
+					cache.providers[providerId] = {
+						ok: false,
+						fetchedAt: now,
+						error: "no api key configured",
+						windows: [],
+					};
+					continue;
+				}
+				result = await fetcher(apiKey);
+			}
+			cache.providers[providerId] = {
+				ok: true,
+				fetchedAt: now,
+				windows: result.windows ?? [],
+				raw: result.raw,
+			};
+		} catch (err) {
+			cache.providers[providerId] = {
+				ok: false,
+				fetchedAt: now,
+				error: err instanceof Error ? err.message : String(err),
+				windows: [],
+			};
+		}
+	}
+	saveCache(cache);
+}
+
+/**
+ * Fire-and-forget background refresh — safe to call at session_start.
+ * Never throws, never blocks.
+ */
+export function scheduleProviderQuotaRefresh(basePath, auth) {
+	setImmediate(() => runProviderQuotaRefreshIfStale(basePath, auth));
+}
--- a/src/resources/extensions/sf/uok/swarm-dispatch.js
+++ b/src/resources/extensions/sf/uok/swarm-dispatch.js
@ -26,6 +26,7 @@ import {
 	formatMemoriesForPrompt,
 	getActiveMemoriesRanked,
 } from "../memory-store.js";
+import { debugLog } from "../debug-logger.js";
 import { AgentSwarm } from "./agent-swarm.js";
 import { MessageBus } from "./message-bus.js";
 import { createDefaultSwarm } from "./swarm-roles.js";
@ -407,13 +408,29 @@ export class SwarmDispatchLayer {
 		}

 		// Step 1: Enqueue via existing _busDispatch
+		debugLog("swarm-dispatch", {
+			phase: "before-busDispatch",
+			unitType: envelope.unitType,
+			unitId: envelope.unitId,
+		});
 		const dispatchResult = await this._busDispatch(envelope);
+		debugLog("swarm-dispatch", {
+			phase: "after-busDispatch",
+			unitType: envelope.unitType,
+			unitId: envelope.unitId,
+			targetAgent: dispatchResult.targetAgent,
+			messageId: dispatchResult.messageId,
+		});

 		// Step 2: Look up the target agent in the swarm
 		const swarm = await this.getOrCreateSwarm();
 		const agent = swarm.get(dispatchResult.targetAgent);

 		if (!agent) {
+			debugLog("swarm-dispatch", {
+				phase: "agent-not-found",
+				targetAgent: dispatchResult.targetAgent,
+			});
 			return {
 				...dispatchResult,
 				reply: null,
@ -421,6 +438,11 @@ export class SwarmDispatchLayer {
 				error: `dispatchAndWait: target agent "${dispatchResult.targetAgent}" not found in swarm`,
 			};
 		}
+		debugLog("swarm-dispatch", {
+			phase: "before-runAgentTurn",
+			targetAgent: dispatchResult.targetAgent,
+			messageId: dispatchResult.messageId,
+		});

 		// Step 3: Drive one inbox-processing turn via runAgentTurn (in-process).
 		// Pass onlyMessageId so runAgentTurn forces an inbox refresh (catching