sf snapshot: uncommitted changes after 30m inactivity

This commit is contained in:
Mikael Hugo 2026-05-16 17:30:35 +02:00
parent b5764af27b
commit effbb75f83
6 changed files with 529 additions and 62 deletions

View file

@ -801,15 +801,20 @@ if (cliFlags.maintain) {
const { computeBenchmarkCoverage, writeBenchmarkCoverage } = await import(
"./resources/extensions/sf/benchmark-coverage.js"
);
await runModelCatalogRefreshIfStale(process.cwd(), getKeyManagerAuthStorage());
const { runProviderQuotaRefreshIfStale } = await import(
"./resources/extensions/sf/provider-quota-cache.js"
);
const auth = getKeyManagerAuthStorage();
await runModelCatalogRefreshIfStale(process.cwd(), auth);
await runGeminiCatalogRefreshIfStale(process.cwd());
await runOpenaiCodexCatalogRefreshIfStale(process.cwd());
await runProviderQuotaRefreshIfStale(process.cwd(), auth);
const prefs = loadEffectiveSFPreferences()?.preferences ?? {};
const coverage = computeBenchmarkCoverage(prefs);
writeBenchmarkCoverage(coverage);
const ms = Date.now() - startedAt;
process.stdout.write(
`[sf --maintain] catalog refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
`[sf --maintain] catalog + quota refresh + coverage audit done in ${ms}ms — coverage ${coverage.summary.coveredCount}/${coverage.summary.total} (${coverage.uncovered.length} uncovered)\n`,
);
} catch (err) {
process.stderr.write(

View file

@ -1,23 +1,26 @@
/**
* headless-usage.ts `sf headless usage`
*
* Purpose: expose live LLM-provider usage data (account tier, project, per-model
* quota usage with reset windows) via the headless CLI so operators and CI can
* see capacity state without launching the interactive UI.
* Live LLM-provider subscription quota state for every provider with a
* documented introspection endpoint:
*
* Today this covers the gemini-cli provider (the most quota-sensitive surface
* because of AI Ultra's per-model windowed quotas). Other providers can be
* added by extending the snapshot helper as their introspection APIs are
* wired into dedicated provider packages.
* - kimi-coding GET https://api.kimi.com/coding/v1/usages
* - openrouter GET https://openrouter.ai/api/v1/credits
* - minimax GET https://api.minimax.io/v1/token_plan/remains
* - zai GET https://api.z.ai/api/monitor/usage/quota/limit
* - google-gemini-cli via snapshotGeminiCliAccount (OAuth Code Assist)
*
* Each call goes through `provider-quota-cache.js` which writes a unified
* representation to ~/.sf/provider-quota.json (15-minute TTL). This command
* forces a refresh, then prints either a compact human table or JSON.
*
* Providers without a documented quota endpoint (mistral, ollama-cloud,
* opencode, opencode-go, xiaomi) are listed as "unavailable" with a short
* note so users see exactly which subs SF can introspect today.
*
* Consumer: headless.ts when command === "usage".
*/
import {
type GeminiAccountSnapshot,
snapshotGeminiCliAccount,
} from "@singularity-forge/google-gemini-cli-provider";
export interface HandleUsageOptions {
json?: boolean;
}
@ -26,77 +29,99 @@ export interface HandleUsageResult {
exitCode: number;
}
const NO_API_PROVIDERS: ReadonlyArray<{ id: string; reason: string }> = [
{ id: "mistral", reason: "no public quota endpoint — console.mistral.ai" },
{ id: "ollama-cloud", reason: "WorkOS dashboard only — ollama.com/settings" },
{ id: "opencode", reason: "no public quota endpoint" },
{ id: "opencode-go", reason: "no public quota endpoint" },
{ id: "xiaomi", reason: "no public quota endpoint — platform.xiaomimimo.com" },
];
/**
* Render a snapshot as a compact text table (default) or as JSON for machine
* consumers. Always writes to stdout; never throws.
* Render the unified provider-quota snapshot as a compact text table (default)
* or as JSON for machine consumers. Always writes to stdout; never throws.
*/
export async function handleUsage(
cwd: string,
options: HandleUsageOptions = {},
): Promise<HandleUsageResult> {
let snapshot: GeminiAccountSnapshot | null;
const { runProviderQuotaRefreshIfStale, getAllProviderQuotaEntries } =
await import("./resources/extensions/sf/provider-quota-cache.js");
const { getKeyManagerAuthStorage } = await import(
"./resources/extensions/sf/key-manager.js"
);
const auth = getKeyManagerAuthStorage();
try {
snapshot = await snapshotGeminiCliAccount(cwd);
await runProviderQuotaRefreshIfStale(cwd, auth);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
const payload = {
provider: "google-gemini-cli",
ok: false,
error: msg,
};
process.stdout.write(
options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`,
);
return { exitCode: 1 };
// Fall through to display whatever's cached, even on refresh failure.
if (!options.json) {
process.stderr.write(
`warning: quota refresh failed: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
}
if (!snapshot) {
const payload = {
provider: "google-gemini-cli",
ok: false,
error:
"No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.",
};
process.stdout.write(
options.json
? `${JSON.stringify(payload)}\n`
: `${payload.error}\n`,
);
return { exitCode: 1 };
}
const entries = getAllProviderQuotaEntries();
if (options.json) {
process.stdout.write(
`${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`,
);
const payload = {
ok: true,
providers: entries,
unavailable: NO_API_PROVIDERS,
};
process.stdout.write(`${JSON.stringify(payload)}\n`);
return { exitCode: 0 };
}
const lines: string[] = [];
lines.push("Gemini CLI usage");
lines.push("Provider quota state");
lines.push("");
lines.push(` project: ${snapshot.projectId}`);
if (snapshot.userTierId || snapshot.userTierName) {
const providerIds = Object.keys(entries).sort();
if (providerIds.length === 0) {
lines.push(
` userTier: ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`,
" (no providers have a quota snapshot yet — check API keys are configured)",
);
}
if (snapshot.paidTier?.id || snapshot.paidTier?.name) {
lines.push(
` paidTier: ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? `${snapshot.paidTier.name}` : ""}`,
for (const providerId of providerIds) {
const entry = entries[providerId];
if (!entry?.ok) {
lines.push(
` ${providerId.padEnd(20)} — error: ${entry?.error ?? "unknown"}`,
);
continue;
}
lines.push(` ${providerId} (fetched ${entry.fetchedAt})`);
const windows = entry.windows ?? [];
if (windows.length === 0) {
lines.push(" (no windows reported)");
continue;
}
const labelW = Math.max(
16,
...windows.map((w) => (w.label ?? "").length),
);
for (const w of windows) {
const pct =
typeof w.usedFraction === "number"
? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%`
: " ? ";
const reset = w.resetHint ? ` reset=${w.resetHint}` : "";
lines.push(
` ${String(w.label ?? "").padEnd(labelW)} used=${pct}${reset}`,
);
}
}
lines.push("");
lines.push(" Per-model quota:");
const modelW = Math.max(
20,
...snapshot.models.map((m) => m.modelId.length),
);
for (const m of snapshot.models) {
const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5);
const reset = m.resetTime ?? "-";
lines.push(` ${m.modelId.padEnd(modelW)} used=${usedPct}% reset=${reset}`);
lines.push("No public quota API for:");
for (const p of NO_API_PROVIDERS) {
lines.push(` ${p.id.padEnd(20)} ${p.reason}`);
}
process.stdout.write(`${lines.join("\n")}\n`);
return { exitCode: 0 };
}

View file

@ -201,7 +201,16 @@ export function detectRogueFileWrites(unitType, unitId, basePath) {
if (!summaryPath || !existsSync(summaryPath)) return [];
const dbRow = getTask(mid, sid, tid);
if (!dbRow || dbRow.status !== "complete") {
rogues.push({ path: summaryPath, unitType, unitId });
// Auto-remediate: SUMMARY exists on disk but DB is stale — sync DB to
// match filesystem instead of reporting as rogue (#3633).
// This handles the case where a commit landed without going through
// complete_task (e.g. batch commit from IDE, subagent, or direct git).
try {
updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString());
} catch {
// If DB update fails, fall back to rogue detection so the issue is visible
rogues.push({ path: summaryPath, unitType, unitId });
}
}
} else if (unitType === "complete-slice") {
if (!mid || !sid) return [];

View file

@ -560,6 +560,24 @@ export function registerHooks(pi, ecosystemHandlers = []) {
} catch {
/* non-fatal — benchmark audit must never block session start */
}
// Refresh per-provider subscription quota state in the background.
// Today this covers the 5 providers with documented introspection
// endpoints (kimi-coding, openrouter, minimax, zai, google-gemini-cli);
// other providers are absent from the cache and consumers treat
// "no entry" as "unknown". TTL is 15 minutes — quota state changes
// much faster than catalogs, so we refresh more aggressively.
try {
const { scheduleProviderQuotaRefresh } = await import(
"../provider-quota-cache.js"
);
const { getKeyManagerAuthStorage } = await import("../key-manager.js");
scheduleProviderQuotaRefresh(
process.cwd(),
getKeyManagerAuthStorage(),
);
} catch {
/* non-fatal — quota refresh must never block session start */
}
// Detect drift in source-of-truth markdown files since last session.
try {
const { detectMdFileDrift, formatDriftReport } = await import(

View file

@ -0,0 +1,388 @@
/**
* provider-quota-cache.js fetch and cache subscription quota state per provider.
*
* For the five providers with a real quota introspection endpoint
* (kimi-coding, openrouter, minimax, zai, google-gemini-cli) this module
* fetches usage / remaining-quota data and caches it globally at
* ~/.sf/provider-quota.json with a 15-minute TTL.
*
* The other allowed providers (mistral, ollama-cloud, opencode, opencode-go,
* xiaomi) have no documented programmatic quota API today. They are simply
* absent from the cache consumers should treat "no entry" as "unknown" and
* fall back to local-dispatch counters or accept routing without quota signal.
*
* Designed for two consumers:
* - sf headless usage: display quota for all providers at once.
* - routing layer (phase 2): bias dispatch toward under-used subs to
* maximize subscription utilization ("spend the subs").
*/
import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
} from "node:fs";
import { dirname, join } from "node:path";
import { sfHome } from "./sf-home.js";
const QUOTA_TTL_MS = 15 * 60 * 1000;
/** Providers that have a documented quota endpoint we can call with an API key. */
export const QUOTA_CAPABLE_PROVIDER_IDS = [
"kimi-coding",
"openrouter",
"minimax",
"zai",
"google-gemini-cli",
];
function quotaFilePath() {
return join(sfHome(), "provider-quota.json");
}
/**
* @typedef {object} ProviderQuotaWindow
* @property {string} label human-readable name ("Weekly", "5h rolling", "Monthly MCP", ...)
* @property {number} used tokens / requests / credits consumed
* @property {number} limit window cap
* @property {number} [usedFraction] derived used/limit, clamped 0..1
* @property {string} [resetHint] ISO timestamp or "N hours" depending on provider
*
* @typedef {object} ProviderQuotaEntry
* @property {boolean} ok true if the most recent fetch succeeded
* @property {string} fetchedAt ISO timestamp of the last successful fetch
* @property {string} [error] error message when ok=false
* @property {ProviderQuotaWindow[]} windows one or more rolling/period windows
* @property {Record<string, unknown>} [raw] raw API response for debugging
*/
function clampFraction(used, limit) {
if (typeof used !== "number" || typeof limit !== "number" || limit <= 0) {
return undefined;
}
return Math.max(0, Math.min(1, used / limit));
}
// ─── Per-provider fetchers ───────────────────────────────────────────────────
/**
* Kimi For Coding GET https://api.kimi.com/coding/v1/usages with Bearer auth.
* Shape (from MoonshotAI/kimi-cli usage.py):
* { usage: {limit, used|remaining, name}, limits: [{detail:{...}, window:{duration, timeUnit}}] }
*/
async function fetchKimiCodingQuota(apiKey) {
const res = await fetch("https://api.kimi.com/coding/v1/usages", {
method: "GET",
headers: { Authorization: `Bearer ${apiKey}` },
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const windows = [];
const summary = payload?.usage;
if (summary && typeof summary === "object") {
const limit = typeof summary.limit === "number" ? summary.limit : 0;
let used = typeof summary.used === "number" ? summary.used : undefined;
if (used === undefined && typeof summary.remaining === "number") {
used = limit - summary.remaining;
}
if (typeof used === "number") {
windows.push({
label: String(summary.name ?? "Weekly limit"),
used,
limit,
usedFraction: clampFraction(used, limit),
});
}
}
if (Array.isArray(payload?.limits)) {
for (const item of payload.limits) {
const detail = item?.detail ?? item;
const window = item?.window;
const limit = typeof detail?.limit === "number" ? detail.limit : 0;
let used = typeof detail?.used === "number" ? detail.used : undefined;
if (used === undefined && typeof detail?.remaining === "number") {
used = limit - detail.remaining;
}
if (typeof used !== "number") continue;
const winLabel =
window?.duration && window?.timeUnit
? `${window.duration} ${window.timeUnit}`
: undefined;
windows.push({
label: String(detail?.name ?? winLabel ?? "rolling window"),
used,
limit,
usedFraction: clampFraction(used, limit),
});
}
}
return { windows, raw: payload };
}
/**
* OpenRouter GET https://openrouter.ai/api/v1/credits with Bearer auth.
* Returns { data: { total_credits, total_usage } } where both are USD floats.
* Used relative to the user's purchased credits, not a rolling window.
*/
async function fetchOpenrouterQuota(apiKey) {
const res = await fetch("https://openrouter.ai/api/v1/credits", {
method: "GET",
headers: { Authorization: `Bearer ${apiKey}` },
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const totalCredits = Number(payload?.data?.total_credits ?? 0);
const totalUsage = Number(payload?.data?.total_usage ?? 0);
const remaining = totalCredits - totalUsage;
return {
windows: [
{
label: "credits (USD)",
used: totalUsage,
limit: totalCredits || totalUsage, // avoid divide-by-zero
usedFraction:
totalCredits > 0
? clampFraction(totalUsage, totalCredits)
: undefined,
resetHint: remaining >= 0 ? `${remaining.toFixed(2)} USD remaining` : undefined,
},
],
raw: payload,
};
}
/**
* MiniMax GET https://api.minimax.io/v1/token_plan/remains with Bearer auth.
* The 5h rolling window is the load-bearing limit; response shape varies but
* commonly: { remaining_tokens, total_tokens, reset_time, ... }.
*/
async function fetchMinimaxQuota(apiKey) {
const res = await fetch("https://api.minimax.io/v1/token_plan/remains", {
method: "GET",
headers: { Authorization: `Bearer ${apiKey}` },
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const windows = [];
const remaining =
typeof payload?.remaining_tokens === "number"
? payload.remaining_tokens
: typeof payload?.remain === "number"
? payload.remain
: undefined;
const total =
typeof payload?.total_tokens === "number"
? payload.total_tokens
: typeof payload?.total === "number"
? payload.total
: undefined;
if (typeof remaining === "number" && typeof total === "number") {
const used = total - remaining;
windows.push({
label: "token plan",
used,
limit: total,
usedFraction: clampFraction(used, total),
resetHint: payload?.reset_time
? String(payload.reset_time)
: undefined,
});
}
return { windows, raw: payload };
}
/**
* Z.AI GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
* Returns 5h token limit + MCP monthly quota per the opencode-mystatus tool.
* Field names vary; we accept common synonyms.
*/
async function fetchZaiQuota(apiKey) {
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
method: "GET",
headers: { Authorization: `Bearer ${apiKey}` },
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const windows = [];
const buckets = Array.isArray(payload?.data)
? payload.data
: Array.isArray(payload?.limits)
? payload.limits
: Array.isArray(payload)
? payload
: [];
for (const bucket of buckets) {
if (!bucket || typeof bucket !== "object") continue;
const limit =
typeof bucket.limit === "number"
? bucket.limit
: typeof bucket.total === "number"
? bucket.total
: undefined;
let used =
typeof bucket.used === "number"
? bucket.used
: typeof bucket.consumed === "number"
? bucket.consumed
: undefined;
if (used === undefined && typeof bucket.remaining === "number" && typeof limit === "number") {
used = limit - bucket.remaining;
}
if (typeof used !== "number" || typeof limit !== "number") continue;
windows.push({
label: String(bucket.name ?? bucket.type ?? "quota"),
used,
limit,
usedFraction: clampFraction(used, limit),
resetHint: bucket.reset_time
? String(bucket.reset_time)
: bucket.resetAt
? String(bucket.resetAt)
: undefined,
});
}
return { windows, raw: payload };
}
/**
* Google Gemini CLI reuses snapshotGeminiCliAccount from the dedicated
* provider package. Maps the per-model quota windows into the same shape as
* the other providers' entries.
*/
async function fetchGeminiCliQuota(basePath) {
const { snapshotGeminiCliAccount } = await import(
"@singularity-forge/google-gemini-cli-provider"
);
const snapshot = await snapshotGeminiCliAccount(basePath);
if (!snapshot) throw new Error("no snapshot — gemini-cli not signed in");
const windows = (snapshot.models ?? []).map((m) => ({
label: m.modelId,
used: 1, // expressed as fraction directly
limit: 1,
usedFraction:
typeof m.usedFraction === "number" ? m.usedFraction : undefined,
resetHint: m.resetTime ? String(m.resetTime) : undefined,
}));
return { windows, raw: snapshot };
}
const FETCHERS = {
"kimi-coding": fetchKimiCodingQuota,
openrouter: fetchOpenrouterQuota,
minimax: fetchMinimaxQuota,
zai: fetchZaiQuota,
};
// ─── Cache I/O ───────────────────────────────────────────────────────────────
function loadCache() {
const path = quotaFilePath();
if (!existsSync(path)) return { schemaVersion: 1, providers: {} };
try {
const parsed = JSON.parse(readFileSync(path, "utf-8"));
if (parsed?.schemaVersion === 1 && parsed?.providers) return parsed;
return { schemaVersion: 1, providers: {} };
} catch {
return { schemaVersion: 1, providers: {} };
}
}
function saveCache(cache) {
const path = quotaFilePath();
try {
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, `${JSON.stringify(cache, null, 2)}\n`, "utf-8");
} catch {
// Best-effort.
}
}
function isFresh(entry) {
if (!entry?.fetchedAt) return false;
return Date.now() - new Date(entry.fetchedAt).getTime() <= QUOTA_TTL_MS;
}
// ─── Public API ──────────────────────────────────────────────────────────────
/**
* Read the cached quota entry for a provider.
* Returns null if no entry exists or the entry is older than QUOTA_TTL_MS.
*
* @param {string} providerId
* @returns {ProviderQuotaEntry | null}
*/
export function getProviderQuotaState(providerId) {
const cache = loadCache();
const entry = cache.providers?.[providerId];
if (!entry) return null;
if (!isFresh(entry)) return null;
return entry;
}
/**
* Read all cached quota entries, fresh or stale, for display purposes.
* @returns {Record<string, ProviderQuotaEntry>}
*/
export function getAllProviderQuotaEntries() {
return loadCache().providers ?? {};
}
/**
* Awaitable refresh for all quota-capable providers that have a key in `auth`.
* Skips fresh entries (within TTL). Per-provider failures are recorded as
* { ok: false, error } but never thrown.
*
* @param {string} basePath passed through to gemini-cli snapshot helper
* @param {{ getCredentialsForProvider: (id: string) => Array<{type: string, key?: string}> }} auth
*/
export async function runProviderQuotaRefreshIfStale(basePath, auth) {
const cache = loadCache();
cache.providers = cache.providers ?? {};
for (const providerId of QUOTA_CAPABLE_PROVIDER_IDS) {
if (isFresh(cache.providers[providerId])) continue;
const now = new Date().toISOString();
try {
let result;
if (providerId === "google-gemini-cli") {
result = await fetchGeminiCliQuota(basePath);
} else {
const fetcher = FETCHERS[providerId];
if (!fetcher) continue;
const creds = auth?.getCredentialsForProvider?.(providerId) ?? [];
const apiKey = creds.find((c) => c.type === "api_key" && c.key)?.key;
if (!apiKey) {
cache.providers[providerId] = {
ok: false,
fetchedAt: now,
error: "no api key configured",
windows: [],
};
continue;
}
result = await fetcher(apiKey);
}
cache.providers[providerId] = {
ok: true,
fetchedAt: now,
windows: result.windows ?? [],
raw: result.raw,
};
} catch (err) {
cache.providers[providerId] = {
ok: false,
fetchedAt: now,
error: err instanceof Error ? err.message : String(err),
windows: [],
};
}
}
saveCache(cache);
}
/**
* Fire-and-forget background refresh safe to call at session_start.
* Never throws, never blocks.
*/
export function scheduleProviderQuotaRefresh(basePath, auth) {
setImmediate(() => runProviderQuotaRefreshIfStale(basePath, auth));
}

View file

@ -26,6 +26,7 @@ import {
formatMemoriesForPrompt,
getActiveMemoriesRanked,
} from "../memory-store.js";
import { debugLog } from "../debug-logger.js";
import { AgentSwarm } from "./agent-swarm.js";
import { MessageBus } from "./message-bus.js";
import { createDefaultSwarm } from "./swarm-roles.js";
@ -407,13 +408,29 @@ export class SwarmDispatchLayer {
}
// Step 1: Enqueue via existing _busDispatch
debugLog("swarm-dispatch", {
phase: "before-busDispatch",
unitType: envelope.unitType,
unitId: envelope.unitId,
});
const dispatchResult = await this._busDispatch(envelope);
debugLog("swarm-dispatch", {
phase: "after-busDispatch",
unitType: envelope.unitType,
unitId: envelope.unitId,
targetAgent: dispatchResult.targetAgent,
messageId: dispatchResult.messageId,
});
// Step 2: Look up the target agent in the swarm
const swarm = await this.getOrCreateSwarm();
const agent = swarm.get(dispatchResult.targetAgent);
if (!agent) {
debugLog("swarm-dispatch", {
phase: "agent-not-found",
targetAgent: dispatchResult.targetAgent,
});
return {
...dispatchResult,
reply: null,
@ -421,6 +438,11 @@ export class SwarmDispatchLayer {
error: `dispatchAndWait: target agent "${dispatchResult.targetAgent}" not found in swarm`,
};
}
debugLog("swarm-dispatch", {
phase: "before-runAgentTurn",
targetAgent: dispatchResult.targetAgent,
messageId: dispatchResult.messageId,
});
// Step 3: Drive one inbox-processing turn via runAgentTurn (in-process).
// Pass onlyMessageId so runAgentTurn forces an inbox refresh (catching