singularity-forge/src/headless-usage.ts
Mikael Hugo c0d089f9ca feat(catalog/quota): global model catalog, benchmark coverage audit, provider quota visibility
Phase-1 work shipped together since prior auto-snapshots split it across
several commits. This commit captures the leftover type declarations,
the new provider-quota-cache test suite, and the last register-hooks /
cli wiring.

Highlights now in tree:

- Model catalog moved from per-project to global `~/.sf/model-catalog/`
  via `sfHome()` (one cache shared by all repos; no more 9-dir
  duplication).

- `benchmark-coverage.js` audits the dispatchable model set against
  `learning/data/model-benchmarks.json` at session_start, writes
  `~/.sf/benchmark-coverage.json`, notifies on change.

- `provider-quota-cache.js` introduces phase-1 subscription quota
  visibility for the 5 providers with documented APIs:
  kimi-coding (/coding/v1/usages), openrouter (/api/v1/credits),
  minimax (/v1/token_plan/remains), zai (/api/monitor/usage/quota/limit),
  google-gemini-cli (existing snapshotGeminiCliAccount). 15-min TTL,
  global cache.

- `sf --maintain` CLI flag refreshes catalogs + quotas + coverage audit
  in one idempotent pass. Daemon spawns it every 6h.

- `sf headless usage` rewritten to display all providers from the
  unified cache, with explicit "no public API" notes for mistral,
  ollama-cloud, opencode, opencode-go, xiaomi.

- Awaitable `runXIfStale` variants for model-catalog, gemini-catalog,
  openai-codex-catalog (the schedule* variants now wrap them in
  setImmediate).

- TypeScript declarations added for the new JS modules so the
  dist-redirect pipeline type-checks cleanly.

Phase 2 (quota-aware routing in benchmark-selector) is filed as SF
self-feedback for the backlog.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 17:37:20 +02:00

131 lines
4 KiB
TypeScript

/**
* headless-usage.ts — `sf headless usage`
*
* Live LLM-provider subscription quota state for every provider with a
* documented introspection endpoint:
*
* - kimi-coding GET https://api.kimi.com/coding/v1/usages
* - openrouter GET https://openrouter.ai/api/v1/credits
* - minimax GET https://api.minimax.io/v1/token_plan/remains
* - zai GET https://api.z.ai/api/monitor/usage/quota/limit
* - google-gemini-cli via snapshotGeminiCliAccount (OAuth Code Assist)
*
* Each call goes through `provider-quota-cache.js` which writes a unified
* representation to ~/.sf/provider-quota.json (15-minute TTL). This command
* forces a refresh, then prints either a compact human table or JSON.
*
* Providers without a documented quota endpoint (mistral, ollama-cloud,
* opencode, opencode-go, xiaomi) are listed as "unavailable" with a short
* note so users see exactly which subs SF can introspect today.
*
* Consumer: headless.ts when command === "usage".
*/
export interface HandleUsageOptions {
json?: boolean;
}
export interface HandleUsageResult {
exitCode: number;
}
const NO_API_PROVIDERS: ReadonlyArray<{ id: string; reason: string }> = [
{ id: "mistral", reason: "no public quota endpoint — console.mistral.ai" },
{ id: "ollama-cloud", reason: "WorkOS dashboard only — ollama.com/settings" },
{ id: "opencode", reason: "no public quota endpoint" },
{ id: "opencode-go", reason: "no public quota endpoint" },
{ id: "xiaomi", reason: "no public quota endpoint — platform.xiaomimimo.com" },
];
/**
* Render the unified provider-quota snapshot as a compact text table (default)
* or as JSON for machine consumers. Always writes to stdout; never throws.
*/
export async function handleUsage(
cwd: string,
options: HandleUsageOptions = {},
): Promise<HandleUsageResult> {
const { runProviderQuotaRefreshIfStale, getAllProviderQuotaEntries } =
await import("./resources/extensions/sf/provider-quota-cache.js");
const { getKeyManagerAuthStorage } = await import(
"./resources/extensions/sf/key-manager.js"
);
const auth = getKeyManagerAuthStorage();
try {
await runProviderQuotaRefreshIfStale(cwd, auth);
} catch (err) {
// Fall through to display whatever's cached, even on refresh failure.
if (!options.json) {
process.stderr.write(
`warning: quota refresh failed: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
}
const entries = getAllProviderQuotaEntries();
if (options.json) {
const payload = {
ok: true,
providers: entries,
unavailable: NO_API_PROVIDERS,
};
process.stdout.write(`${JSON.stringify(payload)}\n`);
return { exitCode: 0 };
}
const lines: string[] = [];
lines.push("Provider quota state");
lines.push("");
const providerIds = Object.keys(entries).sort();
if (providerIds.length === 0) {
lines.push(
" (no providers have a quota snapshot yet — check API keys are configured)",
);
}
for (const providerId of providerIds) {
const entry = entries[providerId];
if (!entry?.ok) {
lines.push(
` ${providerId.padEnd(20)} — error: ${entry?.error ?? "unknown"}`,
);
continue;
}
lines.push(` ${providerId} (fetched ${entry.fetchedAt})`);
const windows = entry.windows ?? [];
if (windows.length === 0) {
lines.push(" (no windows reported)");
continue;
}
const labelW = Math.max(
16,
...windows.map((w) => (w.label ?? "").length),
);
for (const w of windows as Array<{
label?: string;
usedFraction?: number;
resetHint?: string;
}>) {
const pct =
typeof w.usedFraction === "number"
? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%`
: " ? ";
const reset = w.resetHint ? ` reset=${w.resetHint}` : "";
lines.push(
` ${String(w.label ?? "").padEnd(labelW)} used=${pct}${reset}`,
);
}
}
lines.push("");
lines.push("No public quota API for:");
for (const p of NO_API_PROVIDERS) {
lines.push(` ${p.id.padEnd(20)} ${p.reason}`);
}
process.stdout.write(`${lines.join("\n")}\n`);
return { exitCode: 0 };
}