Phase-1 work shipped together since prior auto-snapshots split it across several commits. This commit captures the leftover type declarations, the new provider-quota-cache test suite, and the last register-hooks / cli wiring. Highlights now in tree: - Model catalog moved from per-project to global `~/.sf/model-catalog/` via `sfHome()` (one cache shared by all repos; no more 9-dir duplication). - `benchmark-coverage.js` audits the dispatchable model set against `learning/data/model-benchmarks.json` at session_start, writes `~/.sf/benchmark-coverage.json`, notifies on change. - `provider-quota-cache.js` introduces phase-1 subscription quota visibility for the 5 providers with documented APIs: kimi-coding (/coding/v1/usages), openrouter (/api/v1/credits), minimax (/v1/token_plan/remains), zai (/api/monitor/usage/quota/limit), google-gemini-cli (existing snapshotGeminiCliAccount). 15-min TTL, global cache. - `sf --maintain` CLI flag refreshes catalogs + quotas + coverage audit in one idempotent pass. Daemon spawns it every 6h. - `sf headless usage` rewritten to display all providers from the unified cache, with explicit "no public API" notes for mistral, ollama-cloud, opencode, opencode-go, xiaomi. - Awaitable `runXIfStale` variants for model-catalog, gemini-catalog, openai-codex-catalog (the schedule* variants now wrap them in setImmediate). - TypeScript declarations added for the new JS modules so the dist-redirect pipeline type-checks cleanly. Phase 2 (quota-aware routing in benchmark-selector) is filed as SF self-feedback for the backlog. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
131 lines
4 KiB
TypeScript
131 lines
4 KiB
TypeScript
/**
|
|
* headless-usage.ts — `sf headless usage`
|
|
*
|
|
* Live LLM-provider subscription quota state for every provider with a
|
|
* documented introspection endpoint:
|
|
*
|
|
* - kimi-coding GET https://api.kimi.com/coding/v1/usages
|
|
* - openrouter GET https://openrouter.ai/api/v1/credits
|
|
* - minimax GET https://api.minimax.io/v1/token_plan/remains
|
|
* - zai GET https://api.z.ai/api/monitor/usage/quota/limit
|
|
* - google-gemini-cli via snapshotGeminiCliAccount (OAuth Code Assist)
|
|
*
|
|
* Each call goes through `provider-quota-cache.js` which writes a unified
|
|
* representation to ~/.sf/provider-quota.json (15-minute TTL). This command
|
|
* forces a refresh, then prints either a compact human table or JSON.
|
|
*
|
|
* Providers without a documented quota endpoint (mistral, ollama-cloud,
|
|
* opencode, opencode-go, xiaomi) are listed as "unavailable" with a short
|
|
* note so users see exactly which subs SF can introspect today.
|
|
*
|
|
* Consumer: headless.ts when command === "usage".
|
|
*/
|
|
|
|
export interface HandleUsageOptions {
|
|
json?: boolean;
|
|
}
|
|
|
|
export interface HandleUsageResult {
|
|
exitCode: number;
|
|
}
|
|
|
|
const NO_API_PROVIDERS: ReadonlyArray<{ id: string; reason: string }> = [
|
|
{ id: "mistral", reason: "no public quota endpoint — console.mistral.ai" },
|
|
{ id: "ollama-cloud", reason: "WorkOS dashboard only — ollama.com/settings" },
|
|
{ id: "opencode", reason: "no public quota endpoint" },
|
|
{ id: "opencode-go", reason: "no public quota endpoint" },
|
|
{ id: "xiaomi", reason: "no public quota endpoint — platform.xiaomimimo.com" },
|
|
];
|
|
|
|
/**
|
|
* Render the unified provider-quota snapshot as a compact text table (default)
|
|
* or as JSON for machine consumers. Always writes to stdout; never throws.
|
|
*/
|
|
export async function handleUsage(
|
|
cwd: string,
|
|
options: HandleUsageOptions = {},
|
|
): Promise<HandleUsageResult> {
|
|
const { runProviderQuotaRefreshIfStale, getAllProviderQuotaEntries } =
|
|
await import("./resources/extensions/sf/provider-quota-cache.js");
|
|
const { getKeyManagerAuthStorage } = await import(
|
|
"./resources/extensions/sf/key-manager.js"
|
|
);
|
|
|
|
const auth = getKeyManagerAuthStorage();
|
|
try {
|
|
await runProviderQuotaRefreshIfStale(cwd, auth);
|
|
} catch (err) {
|
|
// Fall through to display whatever's cached, even on refresh failure.
|
|
if (!options.json) {
|
|
process.stderr.write(
|
|
`warning: quota refresh failed: ${err instanceof Error ? err.message : String(err)}\n`,
|
|
);
|
|
}
|
|
}
|
|
|
|
const entries = getAllProviderQuotaEntries();
|
|
|
|
if (options.json) {
|
|
const payload = {
|
|
ok: true,
|
|
providers: entries,
|
|
unavailable: NO_API_PROVIDERS,
|
|
};
|
|
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
|
return { exitCode: 0 };
|
|
}
|
|
|
|
const lines: string[] = [];
|
|
lines.push("Provider quota state");
|
|
lines.push("");
|
|
|
|
const providerIds = Object.keys(entries).sort();
|
|
if (providerIds.length === 0) {
|
|
lines.push(
|
|
" (no providers have a quota snapshot yet — check API keys are configured)",
|
|
);
|
|
}
|
|
|
|
for (const providerId of providerIds) {
|
|
const entry = entries[providerId];
|
|
if (!entry?.ok) {
|
|
lines.push(
|
|
` ${providerId.padEnd(20)} — error: ${entry?.error ?? "unknown"}`,
|
|
);
|
|
continue;
|
|
}
|
|
lines.push(` ${providerId} (fetched ${entry.fetchedAt})`);
|
|
const windows = entry.windows ?? [];
|
|
if (windows.length === 0) {
|
|
lines.push(" (no windows reported)");
|
|
continue;
|
|
}
|
|
const labelW = Math.max(
|
|
16,
|
|
...windows.map((w) => (w.label ?? "").length),
|
|
);
|
|
for (const w of windows as Array<{
|
|
label?: string;
|
|
usedFraction?: number;
|
|
resetHint?: string;
|
|
}>) {
|
|
const pct =
|
|
typeof w.usedFraction === "number"
|
|
? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%`
|
|
: " ? ";
|
|
const reset = w.resetHint ? ` reset=${w.resetHint}` : "";
|
|
lines.push(
|
|
` ${String(w.label ?? "").padEnd(labelW)} used=${pct}${reset}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
lines.push("");
|
|
lines.push("No public quota API for:");
|
|
for (const p of NO_API_PROVIDERS) {
|
|
lines.push(` ${p.id.padEnd(20)} ${p.reason}`);
|
|
}
|
|
|
|
process.stdout.write(`${lines.join("\n")}\n`);
|
|
return { exitCode: 0 };
|
|
}
|