sf snapshot: uncommitted changes after 30m inactivity
This commit is contained in:
parent
b5764af27b
commit
effbb75f83
6 changed files with 529 additions and 62 deletions
|
|
@ -801,15 +801,20 @@ if (cliFlags.maintain) {
|
|||
const { computeBenchmarkCoverage, writeBenchmarkCoverage } = await import(
|
||||
"./resources/extensions/sf/benchmark-coverage.js"
|
||||
);
|
||||
await runModelCatalogRefreshIfStale(process.cwd(), getKeyManagerAuthStorage());
|
||||
const { runProviderQuotaRefreshIfStale } = await import(
|
||||
"./resources/extensions/sf/provider-quota-cache.js"
|
||||
);
|
||||
const auth = getKeyManagerAuthStorage();
|
||||
await runModelCatalogRefreshIfStale(process.cwd(), auth);
|
||||
await runGeminiCatalogRefreshIfStale(process.cwd());
|
||||
await runOpenaiCodexCatalogRefreshIfStale(process.cwd());
|
||||
await runProviderQuotaRefreshIfStale(process.cwd(), auth);
|
||||
const prefs = loadEffectiveSFPreferences()?.preferences ?? {};
|
||||
const coverage = computeBenchmarkCoverage(prefs);
|
||||
writeBenchmarkCoverage(coverage);
|
||||
const ms = Date.now() - startedAt;
|
||||
process.stdout.write(
|
||||
`[sf --maintain] catalog refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
|
||||
`[sf --maintain] catalog + quota refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
|
||||
);
|
||||
} catch (err) {
|
||||
process.stderr.write(
|
||||
|
|
|
|||
|
|
@ -1,23 +1,26 @@
|
|||
/**
|
||||
* headless-usage.ts — `sf headless usage`
|
||||
*
|
||||
* Purpose: expose live LLM-provider usage data (account tier, project, per-model
|
||||
* quota usage with reset windows) via the headless CLI so operators and CI can
|
||||
* see capacity state without launching the interactive UI.
|
||||
* Live LLM-provider subscription quota state for every provider with a
|
||||
* documented introspection endpoint:
|
||||
*
|
||||
* Today this covers the gemini-cli provider (the most quota-sensitive surface
|
||||
* because of AI Ultra's per-model windowed quotas). Other providers can be
|
||||
* added by extending the snapshot helper as their introspection APIs are
|
||||
* wired into dedicated provider packages.
|
||||
* - kimi-coding GET https://api.kimi.com/coding/v1/usages
|
||||
* - openrouter GET https://openrouter.ai/api/v1/credits
|
||||
* - minimax GET https://api.minimax.io/v1/token_plan/remains
|
||||
* - zai GET https://api.z.ai/api/monitor/usage/quota/limit
|
||||
* - google-gemini-cli via snapshotGeminiCliAccount (OAuth Code Assist)
|
||||
*
|
||||
* Each call goes through `provider-quota-cache.js` which writes a unified
|
||||
* representation to ~/.sf/provider-quota.json (15-minute TTL). This command
|
||||
* forces a refresh, then prints either a compact human table or JSON.
|
||||
*
|
||||
* Providers without a documented quota endpoint (mistral, ollama-cloud,
|
||||
* opencode, opencode-go, xiaomi) are listed as "unavailable" with a short
|
||||
* note so users see exactly which subs SF can introspect today.
|
||||
*
|
||||
* Consumer: headless.ts when command === "usage".
|
||||
*/
|
||||
|
||||
import {
|
||||
type GeminiAccountSnapshot,
|
||||
snapshotGeminiCliAccount,
|
||||
} from "@singularity-forge/google-gemini-cli-provider";
|
||||
|
||||
export interface HandleUsageOptions {
|
||||
json?: boolean;
|
||||
}
|
||||
|
|
@ -26,77 +29,99 @@ export interface HandleUsageResult {
|
|||
exitCode: number;
|
||||
}
|
||||
|
||||
const NO_API_PROVIDERS: ReadonlyArray<{ id: string; reason: string }> = [
|
||||
{ id: "mistral", reason: "no public quota endpoint — console.mistral.ai" },
|
||||
{ id: "ollama-cloud", reason: "WorkOS dashboard only — ollama.com/settings" },
|
||||
{ id: "opencode", reason: "no public quota endpoint" },
|
||||
{ id: "opencode-go", reason: "no public quota endpoint" },
|
||||
{ id: "xiaomi", reason: "no public quota endpoint — platform.xiaomimimo.com" },
|
||||
];
|
||||
|
||||
/**
|
||||
* Render a snapshot as a compact text table (default) or as JSON for machine
|
||||
* consumers. Always writes to stdout; never throws.
|
||||
* Render the unified provider-quota snapshot as a compact text table (default)
|
||||
* or as JSON for machine consumers. Always writes to stdout; never throws.
|
||||
*/
|
||||
export async function handleUsage(
|
||||
cwd: string,
|
||||
options: HandleUsageOptions = {},
|
||||
): Promise<HandleUsageResult> {
|
||||
let snapshot: GeminiAccountSnapshot | null;
|
||||
const { runProviderQuotaRefreshIfStale, getAllProviderQuotaEntries } =
|
||||
await import("./resources/extensions/sf/provider-quota-cache.js");
|
||||
const { getKeyManagerAuthStorage } = await import(
|
||||
"./resources/extensions/sf/key-manager.js"
|
||||
);
|
||||
|
||||
const auth = getKeyManagerAuthStorage();
|
||||
try {
|
||||
snapshot = await snapshotGeminiCliAccount(cwd);
|
||||
await runProviderQuotaRefreshIfStale(cwd, auth);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
const payload = {
|
||||
provider: "google-gemini-cli",
|
||||
ok: false,
|
||||
error: msg,
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`,
|
||||
);
|
||||
return { exitCode: 1 };
|
||||
// Fall through to display whatever's cached, even on refresh failure.
|
||||
if (!options.json) {
|
||||
process.stderr.write(
|
||||
`warning: quota refresh failed: ${err instanceof Error ? err.message : String(err)}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!snapshot) {
|
||||
const payload = {
|
||||
provider: "google-gemini-cli",
|
||||
ok: false,
|
||||
error:
|
||||
"No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.",
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json
|
||||
? `${JSON.stringify(payload)}\n`
|
||||
: `${payload.error}\n`,
|
||||
);
|
||||
return { exitCode: 1 };
|
||||
}
|
||||
const entries = getAllProviderQuotaEntries();
|
||||
|
||||
if (options.json) {
|
||||
process.stdout.write(
|
||||
`${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`,
|
||||
);
|
||||
const payload = {
|
||||
ok: true,
|
||||
providers: entries,
|
||||
unavailable: NO_API_PROVIDERS,
|
||||
};
|
||||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push("Gemini CLI usage");
|
||||
lines.push("Provider quota state");
|
||||
lines.push("");
|
||||
lines.push(` project: ${snapshot.projectId}`);
|
||||
if (snapshot.userTierId || snapshot.userTierName) {
|
||||
|
||||
const providerIds = Object.keys(entries).sort();
|
||||
if (providerIds.length === 0) {
|
||||
lines.push(
|
||||
` userTier: ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`,
|
||||
" (no providers have a quota snapshot yet — check API keys are configured)",
|
||||
);
|
||||
}
|
||||
if (snapshot.paidTier?.id || snapshot.paidTier?.name) {
|
||||
lines.push(
|
||||
` paidTier: ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? ` — ${snapshot.paidTier.name}` : ""}`,
|
||||
|
||||
for (const providerId of providerIds) {
|
||||
const entry = entries[providerId];
|
||||
if (!entry?.ok) {
|
||||
lines.push(
|
||||
` ${providerId.padEnd(20)} — error: ${entry?.error ?? "unknown"}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
lines.push(` ${providerId} (fetched ${entry.fetchedAt})`);
|
||||
const windows = entry.windows ?? [];
|
||||
if (windows.length === 0) {
|
||||
lines.push(" (no windows reported)");
|
||||
continue;
|
||||
}
|
||||
const labelW = Math.max(
|
||||
16,
|
||||
...windows.map((w) => (w.label ?? "").length),
|
||||
);
|
||||
for (const w of windows) {
|
||||
const pct =
|
||||
typeof w.usedFraction === "number"
|
||||
? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%`
|
||||
: " ? ";
|
||||
const reset = w.resetHint ? ` reset=${w.resetHint}` : "";
|
||||
lines.push(
|
||||
` ${String(w.label ?? "").padEnd(labelW)} used=${pct}${reset}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("");
|
||||
lines.push(" Per-model quota:");
|
||||
const modelW = Math.max(
|
||||
20,
|
||||
...snapshot.models.map((m) => m.modelId.length),
|
||||
);
|
||||
for (const m of snapshot.models) {
|
||||
const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5);
|
||||
const reset = m.resetTime ?? "-";
|
||||
lines.push(` ${m.modelId.padEnd(modelW)} used=${usedPct}% reset=${reset}`);
|
||||
lines.push("No public quota API for:");
|
||||
for (const p of NO_API_PROVIDERS) {
|
||||
lines.push(` ${p.id.padEnd(20)} ${p.reason}`);
|
||||
}
|
||||
|
||||
process.stdout.write(`${lines.join("\n")}\n`);
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -201,7 +201,16 @@ export function detectRogueFileWrites(unitType, unitId, basePath) {
|
|||
if (!summaryPath || !existsSync(summaryPath)) return [];
|
||||
const dbRow = getTask(mid, sid, tid);
|
||||
if (!dbRow || dbRow.status !== "complete") {
|
||||
rogues.push({ path: summaryPath, unitType, unitId });
|
||||
// Auto-remediate: SUMMARY exists on disk but DB is stale — sync DB to
|
||||
// match filesystem instead of reporting as rogue (#3633).
|
||||
// This handles the case where a commit landed without going through
|
||||
// complete_task (e.g. batch commit from IDE, subagent, or direct git).
|
||||
try {
|
||||
updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString());
|
||||
} catch {
|
||||
// If DB update fails, fall back to rogue detection so the issue is visible
|
||||
rogues.push({ path: summaryPath, unitType, unitId });
|
||||
}
|
||||
}
|
||||
} else if (unitType === "complete-slice") {
|
||||
if (!mid || !sid) return [];
|
||||
|
|
|
|||
|
|
@ -560,6 +560,24 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
} catch {
|
||||
/* non-fatal — benchmark audit must never block session start */
|
||||
}
|
||||
// Refresh per-provider subscription quota state in the background.
|
||||
// Today this covers the 5 providers with documented introspection
|
||||
// endpoints (kimi-coding, openrouter, minimax, zai, google-gemini-cli);
|
||||
// other providers are absent from the cache and consumers treat
|
||||
// "no entry" as "unknown". TTL is 15 minutes — quota state changes
|
||||
// much faster than catalogs, so we refresh more aggressively.
|
||||
try {
|
||||
const { scheduleProviderQuotaRefresh } = await import(
|
||||
"../provider-quota-cache.js"
|
||||
);
|
||||
const { getKeyManagerAuthStorage } = await import("../key-manager.js");
|
||||
scheduleProviderQuotaRefresh(
|
||||
process.cwd(),
|
||||
getKeyManagerAuthStorage(),
|
||||
);
|
||||
} catch {
|
||||
/* non-fatal — quota refresh must never block session start */
|
||||
}
|
||||
// Detect drift in source-of-truth markdown files since last session.
|
||||
try {
|
||||
const { detectMdFileDrift, formatDriftReport } = await import(
|
||||
|
|
|
|||
388
src/resources/extensions/sf/provider-quota-cache.js
Normal file
388
src/resources/extensions/sf/provider-quota-cache.js
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
/**
|
||||
* provider-quota-cache.js — fetch and cache subscription quota state per provider.
|
||||
*
|
||||
* For the five providers with a real quota introspection endpoint
|
||||
* (kimi-coding, openrouter, minimax, zai, google-gemini-cli) this module
|
||||
* fetches usage / remaining-quota data and caches it globally at
|
||||
* ~/.sf/provider-quota.json with a 15-minute TTL.
|
||||
*
|
||||
* The other allowed providers (mistral, ollama-cloud, opencode, opencode-go,
|
||||
* xiaomi) have no documented programmatic quota API today. They are simply
|
||||
* absent from the cache — consumers should treat "no entry" as "unknown" and
|
||||
* fall back to local-dispatch counters or accept routing without quota signal.
|
||||
*
|
||||
* Designed for two consumers:
|
||||
* - sf headless usage: display quota for all providers at once.
|
||||
* - routing layer (phase 2): bias dispatch toward under-used subs to
|
||||
* maximize subscription utilization ("spend the subs").
|
||||
*/
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { sfHome } from "./sf-home.js";
|
||||
|
||||
const QUOTA_TTL_MS = 15 * 60 * 1000;
|
||||
|
||||
/** Providers that have a documented quota endpoint we can call with an API key. */
|
||||
export const QUOTA_CAPABLE_PROVIDER_IDS = [
|
||||
"kimi-coding",
|
||||
"openrouter",
|
||||
"minimax",
|
||||
"zai",
|
||||
"google-gemini-cli",
|
||||
];
|
||||
|
||||
function quotaFilePath() {
|
||||
return join(sfHome(), "provider-quota.json");
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {object} ProviderQuotaWindow
|
||||
* @property {string} label — human-readable name ("Weekly", "5h rolling", "Monthly MCP", ...)
|
||||
* @property {number} used — tokens / requests / credits consumed
|
||||
* @property {number} limit — window cap
|
||||
* @property {number} [usedFraction] — derived used/limit, clamped 0..1
|
||||
* @property {string} [resetHint] — ISO timestamp or "N hours" depending on provider
|
||||
*
|
||||
* @typedef {object} ProviderQuotaEntry
|
||||
* @property {boolean} ok — true if the most recent fetch succeeded
|
||||
* @property {string} fetchedAt — ISO timestamp of the last successful fetch
|
||||
* @property {string} [error] — error message when ok=false
|
||||
* @property {ProviderQuotaWindow[]} windows — one or more rolling/period windows
|
||||
* @property {Record<string, unknown>} [raw] — raw API response for debugging
|
||||
*/
|
||||
|
||||
function clampFraction(used, limit) {
|
||||
if (typeof used !== "number" || typeof limit !== "number" || limit <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
return Math.max(0, Math.min(1, used / limit));
|
||||
}
|
||||
|
||||
// ─── Per-provider fetchers ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Kimi For Coding — GET https://api.kimi.com/coding/v1/usages with Bearer auth.
|
||||
* Shape (from MoonshotAI/kimi-cli usage.py):
|
||||
* { usage: {limit, used|remaining, name}, limits: [{detail:{...}, window:{duration, timeUnit}}] }
|
||||
*/
|
||||
async function fetchKimiCodingQuota(apiKey) {
|
||||
const res = await fetch("https://api.kimi.com/coding/v1/usages", {
|
||||
method: "GET",
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const windows = [];
|
||||
const summary = payload?.usage;
|
||||
if (summary && typeof summary === "object") {
|
||||
const limit = typeof summary.limit === "number" ? summary.limit : 0;
|
||||
let used = typeof summary.used === "number" ? summary.used : undefined;
|
||||
if (used === undefined && typeof summary.remaining === "number") {
|
||||
used = limit - summary.remaining;
|
||||
}
|
||||
if (typeof used === "number") {
|
||||
windows.push({
|
||||
label: String(summary.name ?? "Weekly limit"),
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (Array.isArray(payload?.limits)) {
|
||||
for (const item of payload.limits) {
|
||||
const detail = item?.detail ?? item;
|
||||
const window = item?.window;
|
||||
const limit = typeof detail?.limit === "number" ? detail.limit : 0;
|
||||
let used = typeof detail?.used === "number" ? detail.used : undefined;
|
||||
if (used === undefined && typeof detail?.remaining === "number") {
|
||||
used = limit - detail.remaining;
|
||||
}
|
||||
if (typeof used !== "number") continue;
|
||||
const winLabel =
|
||||
window?.duration && window?.timeUnit
|
||||
? `${window.duration} ${window.timeUnit}`
|
||||
: undefined;
|
||||
windows.push({
|
||||
label: String(detail?.name ?? winLabel ?? "rolling window"),
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
});
|
||||
}
|
||||
}
|
||||
return { windows, raw: payload };
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenRouter — GET https://openrouter.ai/api/v1/credits with Bearer auth.
|
||||
* Returns { data: { total_credits, total_usage } } where both are USD floats.
|
||||
* Used relative to the user's purchased credits, not a rolling window.
|
||||
*/
|
||||
async function fetchOpenrouterQuota(apiKey) {
|
||||
const res = await fetch("https://openrouter.ai/api/v1/credits", {
|
||||
method: "GET",
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const totalCredits = Number(payload?.data?.total_credits ?? 0);
|
||||
const totalUsage = Number(payload?.data?.total_usage ?? 0);
|
||||
const remaining = totalCredits - totalUsage;
|
||||
return {
|
||||
windows: [
|
||||
{
|
||||
label: "credits (USD)",
|
||||
used: totalUsage,
|
||||
limit: totalCredits || totalUsage, // avoid divide-by-zero
|
||||
usedFraction:
|
||||
totalCredits > 0
|
||||
? clampFraction(totalUsage, totalCredits)
|
||||
: undefined,
|
||||
resetHint: remaining >= 0 ? `${remaining.toFixed(2)} USD remaining` : undefined,
|
||||
},
|
||||
],
|
||||
raw: payload,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* MiniMax — GET https://api.minimax.io/v1/token_plan/remains with Bearer auth.
|
||||
* The 5h rolling window is the load-bearing limit; response shape varies but
|
||||
* commonly: { remaining_tokens, total_tokens, reset_time, ... }.
|
||||
*/
|
||||
async function fetchMinimaxQuota(apiKey) {
|
||||
const res = await fetch("https://api.minimax.io/v1/token_plan/remains", {
|
||||
method: "GET",
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const windows = [];
|
||||
const remaining =
|
||||
typeof payload?.remaining_tokens === "number"
|
||||
? payload.remaining_tokens
|
||||
: typeof payload?.remain === "number"
|
||||
? payload.remain
|
||||
: undefined;
|
||||
const total =
|
||||
typeof payload?.total_tokens === "number"
|
||||
? payload.total_tokens
|
||||
: typeof payload?.total === "number"
|
||||
? payload.total
|
||||
: undefined;
|
||||
if (typeof remaining === "number" && typeof total === "number") {
|
||||
const used = total - remaining;
|
||||
windows.push({
|
||||
label: "token plan",
|
||||
used,
|
||||
limit: total,
|
||||
usedFraction: clampFraction(used, total),
|
||||
resetHint: payload?.reset_time
|
||||
? String(payload.reset_time)
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
return { windows, raw: payload };
|
||||
}
|
||||
|
||||
/**
|
||||
* Z.AI — GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
|
||||
* Returns 5h token limit + MCP monthly quota per the opencode-mystatus tool.
|
||||
* Field names vary; we accept common synonyms.
|
||||
*/
|
||||
async function fetchZaiQuota(apiKey) {
|
||||
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
|
||||
method: "GET",
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const windows = [];
|
||||
const buckets = Array.isArray(payload?.data)
|
||||
? payload.data
|
||||
: Array.isArray(payload?.limits)
|
||||
? payload.limits
|
||||
: Array.isArray(payload)
|
||||
? payload
|
||||
: [];
|
||||
for (const bucket of buckets) {
|
||||
if (!bucket || typeof bucket !== "object") continue;
|
||||
const limit =
|
||||
typeof bucket.limit === "number"
|
||||
? bucket.limit
|
||||
: typeof bucket.total === "number"
|
||||
? bucket.total
|
||||
: undefined;
|
||||
let used =
|
||||
typeof bucket.used === "number"
|
||||
? bucket.used
|
||||
: typeof bucket.consumed === "number"
|
||||
? bucket.consumed
|
||||
: undefined;
|
||||
if (used === undefined && typeof bucket.remaining === "number" && typeof limit === "number") {
|
||||
used = limit - bucket.remaining;
|
||||
}
|
||||
if (typeof used !== "number" || typeof limit !== "number") continue;
|
||||
windows.push({
|
||||
label: String(bucket.name ?? bucket.type ?? "quota"),
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
resetHint: bucket.reset_time
|
||||
? String(bucket.reset_time)
|
||||
: bucket.resetAt
|
||||
? String(bucket.resetAt)
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
return { windows, raw: payload };
|
||||
}
|
||||
|
||||
/**
|
||||
* Google Gemini CLI — reuses snapshotGeminiCliAccount from the dedicated
|
||||
* provider package. Maps the per-model quota windows into the same shape as
|
||||
* the other providers' entries.
|
||||
*/
|
||||
async function fetchGeminiCliQuota(basePath) {
|
||||
const { snapshotGeminiCliAccount } = await import(
|
||||
"@singularity-forge/google-gemini-cli-provider"
|
||||
);
|
||||
const snapshot = await snapshotGeminiCliAccount(basePath);
|
||||
if (!snapshot) throw new Error("no snapshot — gemini-cli not signed in");
|
||||
const windows = (snapshot.models ?? []).map((m) => ({
|
||||
label: m.modelId,
|
||||
used: 1, // expressed as fraction directly
|
||||
limit: 1,
|
||||
usedFraction:
|
||||
typeof m.usedFraction === "number" ? m.usedFraction : undefined,
|
||||
resetHint: m.resetTime ? String(m.resetTime) : undefined,
|
||||
}));
|
||||
return { windows, raw: snapshot };
|
||||
}
|
||||
|
||||
const FETCHERS = {
|
||||
"kimi-coding": fetchKimiCodingQuota,
|
||||
openrouter: fetchOpenrouterQuota,
|
||||
minimax: fetchMinimaxQuota,
|
||||
zai: fetchZaiQuota,
|
||||
};
|
||||
|
||||
// ─── Cache I/O ───────────────────────────────────────────────────────────────
|
||||
|
||||
function loadCache() {
|
||||
const path = quotaFilePath();
|
||||
if (!existsSync(path)) return { schemaVersion: 1, providers: {} };
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(path, "utf-8"));
|
||||
if (parsed?.schemaVersion === 1 && parsed?.providers) return parsed;
|
||||
return { schemaVersion: 1, providers: {} };
|
||||
} catch {
|
||||
return { schemaVersion: 1, providers: {} };
|
||||
}
|
||||
}
|
||||
|
||||
function saveCache(cache) {
|
||||
const path = quotaFilePath();
|
||||
try {
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
writeFileSync(path, `${JSON.stringify(cache, null, 2)}\n`, "utf-8");
|
||||
} catch {
|
||||
// Best-effort.
|
||||
}
|
||||
}
|
||||
|
||||
function isFresh(entry) {
|
||||
if (!entry?.fetchedAt) return false;
|
||||
return Date.now() - new Date(entry.fetchedAt).getTime() <= QUOTA_TTL_MS;
|
||||
}
|
||||
|
||||
// ─── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Read the cached quota entry for a provider.
|
||||
* Returns null if no entry exists or the entry is older than QUOTA_TTL_MS.
|
||||
*
|
||||
* @param {string} providerId
|
||||
* @returns {ProviderQuotaEntry | null}
|
||||
*/
|
||||
export function getProviderQuotaState(providerId) {
|
||||
const cache = loadCache();
|
||||
const entry = cache.providers?.[providerId];
|
||||
if (!entry) return null;
|
||||
if (!isFresh(entry)) return null;
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all cached quota entries, fresh or stale, for display purposes.
|
||||
* @returns {Record<string, ProviderQuotaEntry>}
|
||||
*/
|
||||
export function getAllProviderQuotaEntries() {
|
||||
return loadCache().providers ?? {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Awaitable refresh for all quota-capable providers that have a key in `auth`.
|
||||
* Skips fresh entries (within TTL). Per-provider failures are recorded as
|
||||
* { ok: false, error } but never thrown.
|
||||
*
|
||||
* @param {string} basePath — passed through to gemini-cli snapshot helper
|
||||
* @param {{ getCredentialsForProvider: (id: string) => Array<{type: string, key?: string}> }} auth
|
||||
*/
|
||||
export async function runProviderQuotaRefreshIfStale(basePath, auth) {
|
||||
const cache = loadCache();
|
||||
cache.providers = cache.providers ?? {};
|
||||
for (const providerId of QUOTA_CAPABLE_PROVIDER_IDS) {
|
||||
if (isFresh(cache.providers[providerId])) continue;
|
||||
const now = new Date().toISOString();
|
||||
try {
|
||||
let result;
|
||||
if (providerId === "google-gemini-cli") {
|
||||
result = await fetchGeminiCliQuota(basePath);
|
||||
} else {
|
||||
const fetcher = FETCHERS[providerId];
|
||||
if (!fetcher) continue;
|
||||
const creds = auth?.getCredentialsForProvider?.(providerId) ?? [];
|
||||
const apiKey = creds.find((c) => c.type === "api_key" && c.key)?.key;
|
||||
if (!apiKey) {
|
||||
cache.providers[providerId] = {
|
||||
ok: false,
|
||||
fetchedAt: now,
|
||||
error: "no api key configured",
|
||||
windows: [],
|
||||
};
|
||||
continue;
|
||||
}
|
||||
result = await fetcher(apiKey);
|
||||
}
|
||||
cache.providers[providerId] = {
|
||||
ok: true,
|
||||
fetchedAt: now,
|
||||
windows: result.windows ?? [],
|
||||
raw: result.raw,
|
||||
};
|
||||
} catch (err) {
|
||||
cache.providers[providerId] = {
|
||||
ok: false,
|
||||
fetchedAt: now,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
windows: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
saveCache(cache);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fire-and-forget background refresh — safe to call at session_start.
|
||||
* Never throws, never blocks.
|
||||
*/
|
||||
export function scheduleProviderQuotaRefresh(basePath, auth) {
|
||||
setImmediate(() => runProviderQuotaRefreshIfStale(basePath, auth));
|
||||
}
|
||||
|
|
@ -26,6 +26,7 @@ import {
|
|||
formatMemoriesForPrompt,
|
||||
getActiveMemoriesRanked,
|
||||
} from "../memory-store.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import { AgentSwarm } from "./agent-swarm.js";
|
||||
import { MessageBus } from "./message-bus.js";
|
||||
import { createDefaultSwarm } from "./swarm-roles.js";
|
||||
|
|
@ -407,13 +408,29 @@ export class SwarmDispatchLayer {
|
|||
}
|
||||
|
||||
// Step 1: Enqueue via existing _busDispatch
|
||||
debugLog("swarm-dispatch", {
|
||||
phase: "before-busDispatch",
|
||||
unitType: envelope.unitType,
|
||||
unitId: envelope.unitId,
|
||||
});
|
||||
const dispatchResult = await this._busDispatch(envelope);
|
||||
debugLog("swarm-dispatch", {
|
||||
phase: "after-busDispatch",
|
||||
unitType: envelope.unitType,
|
||||
unitId: envelope.unitId,
|
||||
targetAgent: dispatchResult.targetAgent,
|
||||
messageId: dispatchResult.messageId,
|
||||
});
|
||||
|
||||
// Step 2: Look up the target agent in the swarm
|
||||
const swarm = await this.getOrCreateSwarm();
|
||||
const agent = swarm.get(dispatchResult.targetAgent);
|
||||
|
||||
if (!agent) {
|
||||
debugLog("swarm-dispatch", {
|
||||
phase: "agent-not-found",
|
||||
targetAgent: dispatchResult.targetAgent,
|
||||
});
|
||||
return {
|
||||
...dispatchResult,
|
||||
reply: null,
|
||||
|
|
@ -421,6 +438,11 @@ export class SwarmDispatchLayer {
|
|||
error: `dispatchAndWait: target agent "${dispatchResult.targetAgent}" not found in swarm`,
|
||||
};
|
||||
}
|
||||
debugLog("swarm-dispatch", {
|
||||
phase: "before-runAgentTurn",
|
||||
targetAgent: dispatchResult.targetAgent,
|
||||
messageId: dispatchResult.messageId,
|
||||
});
|
||||
|
||||
// Step 3: Drive one inbox-processing turn via runAgentTurn (in-process).
|
||||
// Pass onlyMessageId so runAgentTurn forces an inbox refresh (catching
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue