diff --git a/TODO.md b/TODO.md deleted file mode 100644 index d70aefca9..000000000 --- a/TODO.md +++ /dev/null @@ -1,41 +0,0 @@ -# TODO - -Dump anything here. - ---- - -## Self-Feedback Inbox - -### [prompt-modularization] Phase 3 — migrate remaining builders to `composeUnitContext` v2 - -**Context:** Phase 1 (fragment infrastructure, 17-prompt Working Directory deduplication) and -Phase 2 (5 stub manifests for deploy/smoke-production/release/rollback/challenge) shipped in -commit `ca5d869e3`. 9 of 26 unit types are now fully manifest-driven via `composeInlinedContext`. - -**What's blocked and why:** - -Migrating the remaining 17 builders to `composeInlinedContext` (v1) is the wrong path because: -1. `inlineKnowledgeScoped` and `inlineGraphSubgraph` are NOT in `ARTIFACT_KEYS` — these - artifacts would remain imperative and undeclared in every manifest, making manifests - structurally unreliable descriptions of actual builder behavior. -2. Injecting knowledge/graph at the right position in the composed string requires fragile - sentinel-string searches (e.g., `body.lastIndexOf("### Task Summary:")`). This pattern - is already untested in the 2 migrated complex builders (`research-milestone`, `complete-slice`). -3. `composeUnitContext` (v2) in `unit-context-composer.js` already has `computed`, `prepend`, - and `excerpt` support — knowledge and graph inlining maps cleanly to `computed` entries. - Migrating to v1 now creates a half-migration state that must be undone when v2 lands. - -**Recommended next slice:** -1. Add `"knowledge"` and `"graph"` to `ARTIFACT_KEYS` in `unit-context-manifest.js`. -2. Register them as `computed` entries in relevant `UNIT_MANIFESTS` entries. -3. Wire one builder (e.g., `buildResearchSlicePrompt`) through `composeUnitContext` v2 as pilot. -4. Add position-assertion tests to already-migrated complex builders (`research-milestone`, - `complete-slice`) to guard against silent ordering degradation. -5. Then migrate remaining builders in batches: slice builders → milestone builders → execute-task. - -**Note on `prompt-cache-optimizer.js`:** Entirely dead code — `optimizeForCaching()`, -`estimateCacheSavings()`, `computeCacheHitRate()` have zero importers. `reorderForCaching()` -is wired at `phases-unit.js:519` but no `cache_control` markers are written to outgoing -requests. Remove the file or wire it in the same slice that adds `cache_control` breakpoints. - ---- diff --git a/src/cli.ts b/src/cli.ts index c16e19385..a7a9ed1af 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -809,7 +809,7 @@ if (cliFlags.maintain) { await runGeminiCatalogRefreshIfStale(process.cwd()); await runOpenaiCodexCatalogRefreshIfStale(process.cwd()); await runProviderQuotaRefreshIfStale(process.cwd(), auth); - const prefs = loadEffectiveSFPreferences()?.preferences ?? {}; + const prefs = (loadEffectiveSFPreferences()?.preferences ?? {}) as Record; const coverage = computeBenchmarkCoverage(prefs); writeBenchmarkCoverage(coverage); const ms = Date.now() - startedAt; diff --git a/src/headless-usage.ts b/src/headless-usage.ts index dea7a2eff..79b2adcf6 100644 --- a/src/headless-usage.ts +++ b/src/headless-usage.ts @@ -104,7 +104,11 @@ export async function handleUsage( 16, ...windows.map((w) => (w.label ?? "").length), ); - for (const w of windows) { + for (const w of windows as Array<{ + label?: string; + usedFraction?: number; + resetHint?: string; + }>) { const pct = typeof w.usedFraction === "number" ? `${(w.usedFraction * 100).toFixed(1).padStart(5)}%` diff --git a/src/resources/extensions/sf/benchmark-coverage.d.ts b/src/resources/extensions/sf/benchmark-coverage.d.ts new file mode 100644 index 000000000..82d7eca42 --- /dev/null +++ b/src/resources/extensions/sf/benchmark-coverage.d.ts @@ -0,0 +1,26 @@ +export interface BenchmarkCoverageEntry { + provider: string; + id: string; +} + +export interface BenchmarkCoverageSummary { + total: number; + coveredCount: number; + uncoveredCount: number; + coverageRatio: number; +} + +export interface BenchmarkCoverageResult { + covered: BenchmarkCoverageEntry[]; + uncovered: BenchmarkCoverageEntry[]; + summary: BenchmarkCoverageSummary; +} + +export declare function normalizeForBenchmarkLookup(modelId: string): string; +export declare function computeBenchmarkCoverage(prefs: Record): BenchmarkCoverageResult; +export declare function writeBenchmarkCoverage(coverage: BenchmarkCoverageResult): void; +export declare function detectCoverageChange(coverage: BenchmarkCoverageResult): boolean; +export declare function scheduleBenchmarkCoverageAudit( + prefs: Record, + notify?: (message: string) => void, +): void; diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index fe30e06a2..913df186b 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -550,7 +550,7 @@ export function registerHooks(pi, ecosystemHandlers = []) { const { loadEffectiveSFPreferences } = await import( "../preferences.js" ); - const prefs = loadEffectiveSFPreferences() ?? {}; + const prefs = loadEffectiveSFPreferences()?.preferences ?? {}; scheduleBenchmarkCoverageAudit(prefs, (msg) => ctx.ui?.notify?.(msg, "info", { noticeKind: NOTICE_KIND.SYSTEM_NOTICE, diff --git a/src/resources/extensions/sf/gemini-catalog.d.ts b/src/resources/extensions/sf/gemini-catalog.d.ts new file mode 100644 index 000000000..9e9e76654 --- /dev/null +++ b/src/resources/extensions/sf/gemini-catalog.d.ts @@ -0,0 +1,3 @@ +export declare function refreshGeminiCatalog(basePath: string): Promise; +export declare function runGeminiCatalogRefreshIfStale(basePath: string): Promise; +export declare function scheduleGeminiCatalogRefresh(basePath: string): void; diff --git a/src/resources/extensions/sf/model-catalog-cache.d.ts b/src/resources/extensions/sf/model-catalog-cache.d.ts index 5a8eca7bd..350364c0f 100644 --- a/src/resources/extensions/sf/model-catalog-cache.d.ts +++ b/src/resources/extensions/sf/model-catalog-cache.d.ts @@ -2,5 +2,6 @@ export declare function readCachedModelIds(basePath: string, providerId: string) export declare function getCachedModelIds(basePath: string, providerId: string): string[]; export declare function refreshProviderCatalog(basePath: string, providerId: string, apiKey: string): Promise; export declare function scheduleModelCatalogRefresh(basePath: string, auth: { getCredentialsForProvider: (id: string) => Array<{ type: string; key?: string }> }): void; +export declare function runModelCatalogRefreshIfStale(basePath: string, auth: { getCredentialsForProvider: (id: string) => Array<{ type: string; key?: string }> }): Promise; export declare function refreshSfManagedProviders(basePath: string, auth: { getCredentialsForProvider: (id: string) => Array<{ type: string; key?: string }> }): Promise; export declare function getKnownModelIds(basePath: string, providerId: string, sdkModelIds?: string[]): string[]; diff --git a/src/resources/extensions/sf/openai-codex-catalog.d.ts b/src/resources/extensions/sf/openai-codex-catalog.d.ts new file mode 100644 index 000000000..8ef765016 --- /dev/null +++ b/src/resources/extensions/sf/openai-codex-catalog.d.ts @@ -0,0 +1,4 @@ +export declare function readCodexAvailableModels(): Promise; +export declare function refreshOpenaiCodexCatalog(basePath?: string): Promise; +export declare function runOpenaiCodexCatalogRefreshIfStale(basePath?: string): Promise; +export declare function scheduleOpenaiCodexCatalogRefresh(basePath?: string): void; diff --git a/src/resources/extensions/sf/prompt-cache-optimizer.js b/src/resources/extensions/sf/prompt-cache-optimizer.js deleted file mode 100644 index a415b5d36..000000000 --- a/src/resources/extensions/sf/prompt-cache-optimizer.js +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Prompt Cache Optimizer — separates prompt content into cacheable static - * prefixes and dynamic per-task suffixes to maximize provider cache hit rates. - * - * Anthropic caches by prefix match (up to 4 breakpoints, 90% savings). - * OpenAI auto-caches prompts with 1024+ stable prefix tokens (50% savings). - * Both benefit from placing static content first and dynamic content last. - */ -// ─── Label classification maps ─────────────────────────────────────────────── -/** Labels that never change within a session */ -const STATIC_LABELS = new Set([ - "system-prompt", - "base-instructions", - "executor-constraints", -]); -/** Prefix patterns for static labels (e.g. "template-*") */ -const STATIC_PREFIXES = ["template-"]; -/** Labels that change per-slice but not per-task */ -const SEMI_STATIC_LABELS = new Set([ - "slice-plan", - "decisions", - "requirements", - "roadmap", - "prior-summaries", - "project-context", - "overrides", - // KNOWLEDGE is milestone-scoped (stable within a session), so it belongs - // in the cacheable prefix. See issue #4719. - "knowledge", - "project-knowledge", -]); -/** Labels that change per-task */ -const DYNAMIC_LABELS = new Set([ - "task-plan", - "task-instructions", - "task-context", - "file-contents", - "diff-context", - "verification-commands", -]); -// ─── Public API ────────────────────────────────────────────────────────────── -/** - * Classify common SF prompt sections by their caching potential. - * Returns the appropriate ContentRole for a section label. - */ -export function classifySection(label) { - if (STATIC_LABELS.has(label)) return "static"; - if (STATIC_PREFIXES.some((p) => label.startsWith(p))) return "static"; - if (SEMI_STATIC_LABELS.has(label)) return "semi-static"; - if (DYNAMIC_LABELS.has(label)) return "dynamic"; - // Conservative default: unknown labels are treated as dynamic - return "dynamic"; -} -/** - * Build a PromptSection from content with automatic role classification. - * - * @param label Section label (e.g., "slice-plan", "task-instructions") - * @param content The section content - * @param role Optional explicit role override - */ -export function section(label, content, role) { - return { - label, - content, - role: role ?? classifySection(label), - }; -} -/** - * Optimize prompt sections for maximum cache hit rates. - * Reorders sections: static first, then semi-static, then dynamic. - * Preserves relative order within each role group. - * - * @param sections Array of labeled prompt sections - * @returns Cache-optimized prompt with statistics - */ -export function optimizeForCaching(sections) { - const groups = { - static: [], - "semi-static": [], - dynamic: [], - }; - for (const s of sections) { - groups[s.role].push(s); - } - const ordered = [ - ...groups["static"], - ...groups["semi-static"], - ...groups["dynamic"], - ]; - const prompt = ordered.map((s) => s.content).join("\n\n"); - const staticChars = groups["static"].reduce( - (sum, s) => sum + s.content.length, - 0, - ); - const semiStaticChars = groups["semi-static"].reduce( - (sum, s) => sum + s.content.length, - 0, - ); - // Account for separator characters between sections in the cacheable prefix - const staticSeparators = - groups["static"].length > 0 - ? (groups["static"].length - 1) * 2 // "\n\n" between static sections - : 0; - const semiStaticSeparators = - groups["semi-static"].length > 0 - ? (groups["semi-static"].length - 1) * 2 - : 0; - // Separator between static and semi-static groups - const groupSeparator = - groups["static"].length > 0 && groups["semi-static"].length > 0 ? 2 : 0; - const cacheablePrefixChars = - staticChars + - semiStaticChars + - staticSeparators + - semiStaticSeparators + - groupSeparator; - const totalChars = prompt.length; - const cacheEfficiency = - totalChars > 0 ? cacheablePrefixChars / totalChars : 0; - return { - prompt, - cacheablePrefixChars, - totalChars, - cacheEfficiency, - sectionCounts: { - static: groups["static"].length, - "semi-static": groups["semi-static"].length, - dynamic: groups["dynamic"].length, - }, - }; -} -/** - * Estimate the cache savings for a given optimization result. - * Based on provider pricing: - * - Anthropic: 90% savings on cached tokens - * - OpenAI: 50% savings on cached tokens - * - * @param result The cache-optimized prompt - * @param provider Provider name for savings calculation - * @returns Estimated savings as a decimal (0.0-1.0) - */ -export function estimateCacheSavings(result, provider) { - switch (provider) { - case "anthropic": - return result.cacheEfficiency * 0.9; - case "openai": - return result.cacheEfficiency * 0.5; - case "other": - return 0; - } -} -/** - * Compute cache hit rate from token usage metrics. - * Returns a percentage 0-100. - */ -export function computeCacheHitRate(usage) { - const denominator = usage.cacheRead + usage.input; - if (denominator === 0) return 0; - return (usage.cacheRead / denominator) * 100; -} diff --git a/src/resources/extensions/sf/provider-quota-cache.d.ts b/src/resources/extensions/sf/provider-quota-cache.d.ts new file mode 100644 index 000000000..fd2bab846 --- /dev/null +++ b/src/resources/extensions/sf/provider-quota-cache.d.ts @@ -0,0 +1,25 @@ +export interface ProviderQuotaWindow { + label: string; + used: number; + limit: number; + usedFraction?: number; + resetHint?: string; +} + +export interface ProviderQuotaEntry { + ok: boolean; + fetchedAt: string; + error?: string; + windows: ProviderQuotaWindow[]; + raw?: Record; +} + +export interface AuthLike { + getCredentialsForProvider(id: string): Array<{ type: string; key?: string }>; +} + +export declare const QUOTA_CAPABLE_PROVIDER_IDS: readonly string[]; +export declare function getProviderQuotaState(providerId: string): ProviderQuotaEntry | null; +export declare function getAllProviderQuotaEntries(): Record; +export declare function runProviderQuotaRefreshIfStale(basePath: string, auth: AuthLike): Promise; +export declare function scheduleProviderQuotaRefresh(basePath: string, auth: AuthLike): void; diff --git a/src/resources/extensions/sf/tests/provider-quota-cache.test.mjs b/src/resources/extensions/sf/tests/provider-quota-cache.test.mjs new file mode 100644 index 000000000..80ba5c86a --- /dev/null +++ b/src/resources/extensions/sf/tests/provider-quota-cache.test.mjs @@ -0,0 +1,312 @@ +/** + * provider-quota-cache.test.mjs + * + * Tests that the quota fetcher loop: + * - Calls the right URL with Bearer auth per provider + * - Normalizes each vendor's JSON shape into the shared ProviderQuotaEntry + * - Writes to ~/.sf/provider-quota.json under the global SF_HOME + * - Honors TTL (no refetch when fresh) + * - Records per-provider errors without crashing the loop + */ +import assert from "node:assert/strict"; +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, test } from "vitest"; + +import { + getAllProviderQuotaEntries, + getProviderQuotaState, + runProviderQuotaRefreshIfStale, + QUOTA_CAPABLE_PROVIDER_IDS, +} from "../provider-quota-cache.js"; + +// ─── Test isolation ────────────────────────────────────────────────────────── + +const tmpDirs = []; +let originalSfHome; +let originalFetch; + +beforeEach(() => { + originalSfHome = process.env.SF_HOME; + originalFetch = globalThis.fetch; +}); + +afterEach(() => { + while (tmpDirs.length > 0) { + rmSync(tmpDirs.pop(), { recursive: true, force: true }); + } + if (originalSfHome === undefined) delete process.env.SF_HOME; + else process.env.SF_HOME = originalSfHome; + globalThis.fetch = originalFetch; +}); + +function tempSfHome() { + const dir = mkdtempSync(join(tmpdir(), "sf-quota-cache-test-")); + tmpDirs.push(dir); + process.env.SF_HOME = dir; + return dir; +} + +/** Minimal auth shim: returns the requested key for any provider id. */ +function makeAuth(keys) { + return { + getCredentialsForProvider(id) { + return keys[id] ? [{ type: "api_key", key: keys[id] }] : []; + }, + }; +} + +/** Stub fetch with a per-URL response map. Unmatched calls throw. */ +function stubFetch(responses) { + const calls = []; + globalThis.fetch = async (url, options = {}) => { + calls.push({ url: String(url), headers: options.headers ?? {} }); + const handler = responses[String(url)]; + if (!handler) throw new Error(`unexpected fetch: ${url}`); + const body = typeof handler === "function" ? handler() : handler; + return { + ok: true, + status: 200, + json: async () => body, + }; + }; + return calls; +} + +// ─── Module surface ────────────────────────────────────────────────────────── + +describe("QUOTA_CAPABLE_PROVIDER_IDS", () => { + test("lists the five providers with introspection endpoints", () => { + assert.deepEqual( + [...QUOTA_CAPABLE_PROVIDER_IDS].sort(), + ["google-gemini-cli", "kimi-coding", "minimax", "openrouter", "zai"].sort(), + ); + }); +}); + +// ─── kimi-coding ───────────────────────────────────────────────────────────── + +describe("runProviderQuotaRefreshIfStale — kimi-coding", () => { + test("hits /coding/v1/usages with Bearer auth and parses windows", async () => { + const home = tempSfHome(); + const calls = stubFetch({ + "https://api.kimi.com/coding/v1/usages": { + usage: { limit: 1000, used: 250, name: "Weekly" }, + limits: [ + { + detail: { limit: 200, used: 80, name: "5h" }, + window: { duration: 5, timeUnit: "hours" }, + }, + ], + }, + }); + + await runProviderQuotaRefreshIfStale(home, makeAuth({ "kimi-coding": "test-kimi" })); + + const kimiCall = calls.find((c) => c.url.includes("kimi.com")); + assert.ok(kimiCall, "should have called kimi.com"); + assert.equal(kimiCall.headers.Authorization, "Bearer test-kimi"); + + const entry = getProviderQuotaState("kimi-coding"); + assert.ok(entry, "kimi-coding entry should exist"); + assert.equal(entry.ok, true); + assert.equal(entry.windows.length, 2); + assert.equal(entry.windows[0].label, "Weekly"); + assert.equal(entry.windows[0].used, 250); + assert.equal(entry.windows[0].limit, 1000); + assert.equal(entry.windows[0].usedFraction, 0.25); + assert.equal(entry.windows[1].label, "5h"); + assert.equal(entry.windows[1].usedFraction, 0.4); + }); + + test("falls back from `used` to `limit - remaining`", async () => { + const home = tempSfHome(); + stubFetch({ + "https://api.kimi.com/coding/v1/usages": { + usage: { limit: 1000, remaining: 600, name: "Weekly" }, + }, + }); + await runProviderQuotaRefreshIfStale(home, makeAuth({ "kimi-coding": "k" })); + const entry = getProviderQuotaState("kimi-coding"); + assert.equal(entry.windows[0].used, 400); + assert.equal(entry.windows[0].usedFraction, 0.4); + }); +}); + +// ─── openrouter ────────────────────────────────────────────────────────────── + +describe("runProviderQuotaRefreshIfStale — openrouter", () => { + test("hits /api/v1/credits with Bearer auth", async () => { + const home = tempSfHome(); + const calls = stubFetch({ + "https://openrouter.ai/api/v1/credits": { + data: { total_credits: 10, total_usage: 2.5 }, + }, + }); + + await runProviderQuotaRefreshIfStale( + home, + makeAuth({ openrouter: "test-or" }), + ); + + const orCall = calls.find((c) => c.url.includes("openrouter.ai")); + assert.ok(orCall); + assert.equal(orCall.headers.Authorization, "Bearer test-or"); + + const entry = getProviderQuotaState("openrouter"); + assert.equal(entry.ok, true); + assert.equal(entry.windows[0].used, 2.5); + assert.equal(entry.windows[0].limit, 10); + assert.equal(entry.windows[0].usedFraction, 0.25); + }); +}); + +// ─── minimax ───────────────────────────────────────────────────────────────── + +describe("runProviderQuotaRefreshIfStale — minimax", () => { + test("hits /v1/token_plan/remains and parses remaining_tokens / total_tokens", async () => { + const home = tempSfHome(); + stubFetch({ + "https://api.minimax.io/v1/token_plan/remains": { + remaining_tokens: 700, + total_tokens: 1000, + reset_time: "2026-05-17T00:00:00Z", + }, + }); + + await runProviderQuotaRefreshIfStale( + home, + makeAuth({ minimax: "test-mm" }), + ); + + const entry = getProviderQuotaState("minimax"); + assert.equal(entry.ok, true); + assert.equal(entry.windows[0].used, 300); + assert.equal(entry.windows[0].limit, 1000); + assert.equal(entry.windows[0].usedFraction, 0.3); + assert.equal(entry.windows[0].resetHint, "2026-05-17T00:00:00Z"); + }); +}); + +// ─── zai ───────────────────────────────────────────────────────────────────── + +describe("runProviderQuotaRefreshIfStale — zai", () => { + test("hits /api/monitor/usage/quota/limit and parses bucket array", async () => { + const home = tempSfHome(); + stubFetch({ + "https://api.z.ai/api/monitor/usage/quota/limit": { + data: [ + { name: "5h tokens", limit: 5000, used: 1500 }, + { name: "MCP monthly", limit: 100, used: 70 }, + ], + }, + }); + + await runProviderQuotaRefreshIfStale(home, makeAuth({ zai: "test-zai" })); + + const entry = getProviderQuotaState("zai"); + assert.equal(entry.ok, true); + assert.equal(entry.windows.length, 2); + assert.equal(entry.windows[0].label, "5h tokens"); + assert.equal(entry.windows[0].usedFraction, 0.3); + assert.equal(entry.windows[1].label, "MCP monthly"); + assert.equal(entry.windows[1].usedFraction, 0.7); + }); +}); + +// ─── TTL behavior ──────────────────────────────────────────────────────────── + +describe("TTL", () => { + test("second refresh within TTL is a no-op (does not re-fetch)", async () => { + const home = tempSfHome(); + const calls = stubFetch({ + "https://api.minimax.io/v1/token_plan/remains": { + remaining_tokens: 100, + total_tokens: 200, + }, + }); + + await runProviderQuotaRefreshIfStale(home, makeAuth({ minimax: "k" })); + assert.equal(calls.length, 1); + + await runProviderQuotaRefreshIfStale(home, makeAuth({ minimax: "k" })); + assert.equal( + calls.length, + 1, + "second refresh within TTL should reuse cache", + ); + }); + + test("getProviderQuotaState returns null when there is no entry", () => { + tempSfHome(); + assert.equal(getProviderQuotaState("kimi-coding"), null); + }); +}); + +// ─── Error handling ────────────────────────────────────────────────────────── + +describe("error handling", () => { + test("missing API key is recorded as error, doesn't crash other providers", async () => { + const home = tempSfHome(); + stubFetch({ + "https://openrouter.ai/api/v1/credits": { + data: { total_credits: 5, total_usage: 1 }, + }, + }); + + // Only provide openrouter key; kimi/minimax/zai should record errors. + await runProviderQuotaRefreshIfStale( + home, + makeAuth({ openrouter: "or" }), + ); + + const all = getAllProviderQuotaEntries(); + assert.equal(all["openrouter"].ok, true); + assert.equal(all["kimi-coding"].ok, false); + assert.match(all["kimi-coding"].error, /no api key configured/); + assert.equal(all["minimax"].ok, false); + assert.equal(all["zai"].ok, false); + }); + + test("fetch failure recorded as error, doesn't crash loop", async () => { + const home = tempSfHome(); + globalThis.fetch = async () => { + throw new Error("network down"); + }; + await runProviderQuotaRefreshIfStale( + home, + makeAuth({ "kimi-coding": "k", openrouter: "o", minimax: "m", zai: "z" }), + ); + const all = getAllProviderQuotaEntries(); + for (const pid of ["kimi-coding", "openrouter", "minimax", "zai"]) { + assert.equal(all[pid].ok, false, `${pid} should be marked failed`); + assert.match(all[pid].error, /network down/); + } + }); +}); + +// ─── Cache file format ─────────────────────────────────────────────────────── + +describe("cache file", () => { + test("writes ~/.sf/provider-quota.json with schemaVersion 1", async () => { + const home = tempSfHome(); + stubFetch({ + "https://openrouter.ai/api/v1/credits": { + data: { total_credits: 5, total_usage: 1 }, + }, + }); + await runProviderQuotaRefreshIfStale(home, makeAuth({ openrouter: "k" })); + const path = join(home, "provider-quota.json"); + assert.ok(existsSync(path)); + const parsed = JSON.parse(readFileSync(path, "utf-8")); + assert.equal(parsed.schemaVersion, 1); + assert.ok(parsed.providers.openrouter); + assert.equal(parsed.providers.openrouter.ok, true); + }); +}); diff --git a/src/resources/extensions/sf/uok/agent-runner.js b/src/resources/extensions/sf/uok/agent-runner.js index 92272a8c1..6c1fa15f2 100644 --- a/src/resources/extensions/sf/uok/agent-runner.js +++ b/src/resources/extensions/sf/uok/agent-runner.js @@ -155,13 +155,20 @@ export async function runAgentTurn(agent, opts = {}) { permissionLevel, } = opts; + debugLog("agent-runner", { + event: "runAgentTurn-enter", + agentName: agent.identity?.name, + onlyMessageId: onlyMessageId ?? null, + }); // When onlyMessageId is set, force-refresh the inbox from SQLite so that // messages delivered via a different MessageBus instance (i.e. the // SwarmDispatchLayer's bus) are visible even within the 30s cache window. // This is the root cause of Bug 1: the agent's in-memory inbox is stale on // a second dispatch because INBOX_REFRESH_INTERVAL_MS has not elapsed. if (onlyMessageId) { + debugLog("agent-runner", { event: "before-inbox-refresh", onlyMessageId }); agent._inbox.refresh(); + debugLog("agent-runner", { event: "after-inbox-refresh", onlyMessageId }); } // When onlyMessageId is provided, isolate this message for surgical processing. diff --git a/todo.md b/todo.md deleted file mode 100644 index 508a097f0..000000000 --- a/todo.md +++ /dev/null @@ -1,51 +0,0 @@ -# TODO - -Unimplemented items consolidated from root *.md files. Source file noted for each item. - ---- - -## Critical / Correctness - -- [x] Port `fix(security): harden project-controlled surfaces` — env isolation + transport cleanup done; gsd-2 trust/dedup hunks (server.ts, mcp-client/index.ts) not applicable (packages absent) *(BUILD_PLAN.md Tier 0.5 #2)* -- [x] Port agent-session/agent-end transition fixes — `_sessionSwitchInFlight` guard + `sessionSwitchGeneration` pattern implemented in auto/resolve.js + run-unit.js *(BUILD_PLAN.md Tier 0.5 #7-10)* ---- - -## Architecture / Design Gaps - -- [x] Schema reconciliation: update SPEC.md to 3-table model (milestones/slices/tasks vs single `units`) *(BUILD_PLAN.md Tier 1.3)* -- [ ] Persistent agents v1 command surface — `/sf agent run|reset|delete|inspect` *(BUILD_PLAN.md Tier 2.1)* -- [ ] Intent chapters (`chapter_open`/`chapter_close` — crash-resume context) *(BUILD_PLAN.md Tier 2.3)* -- [ ] PhaseReview 3-pass review (establish-context → parallel chunked → synthesis) *(BUILD_PLAN.md Tier 2.4)* -- [x] `last_error` cap to 4 KB head+tail; full payload to file *(BUILD_PLAN.md Tier 2.6)* -- [x] Port workflow state machine hardening (gsd-2 `f2377eedd`, `b9a1c6743`, `153fb328a`, `381ccdef5`, `371b2eb31`) — Cluster F: 3 fail-open SUMMARY checks fixed in state.js + dispatch-guard.js *(BUILD_PLAN.md Tier 0.5 #13, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster F)* -- [x] Port `fix(claude-code-cli): persist Always Allow for non-Bash tools` (gsd-2 `a88baeae9`) — already implemented; tests confirm *(BUILD_PLAN.md Tier 0.5 #11)* - ---- - -## Medium Priority / Quality - -- [x] Replace `isHeavyModelId()` name-matching heuristic with capability-based check *(PRODUCTION_AUDIT_GRADE.md #9, PRODUCTION_AUDIT.md 3.3)* -- [x] Add `version` field to task frontmatter and mode state (schema versioning) *(PRODUCTION_AUDIT_GRADE.md #8)* -- [ ] Integration tests for full remote steering pipeline *(PRODUCTION_AUDIT.md Long Term #10)* -- [x] Log `frontmatterErrors` in sf-db.js instead of silently dropping validation errors *(PRODUCTION_AUDIT.md 3.1)* -- [x] Search provider registry refactor — consolidate provider list across files into `SearchProviderRegistry` *(BUILD_PLAN.md Tier 1+)* -- [x] Update ARCHITECTURE.md self-evolution section (triage pipeline IS active; injection IS automatic now) *(ARCHITECTURE.md)* -- [x] Add Mermaid state machine diagram to ARCHITECTURE.md — task lifecycle stateDiagram-v2 added *(ARCHITECTURE.md)* -- [ ] Symlinked packages/resources/skills/sessions dedup (pi-mono PR #3818) *(BUILD_PLAN.md Tier 0 #6)* - ---- - -## Long-term / Deferred - -- [ ] Singularity Knowledge + Agent Platform (Go re-platform, ~12 weeks) *(BUILD_PLAN.md Tier 1+)* -- [ ] sf-worker SSH host (Go, `wish` + `xpty`, ~3 weeks) *(BUILD_PLAN.md Tier 4)* -- [ ] Charm TUI client (`sf-tui` in Go, ~12-16 weeks) *(BUILD_PLAN.md Tier 1+)* -- [ ] Flight recorder (`x/vcr`, ~3 weeks) *(BUILD_PLAN.md Tier 1+)* -- [ ] Full swarm chat for `subagent` tool (Option C, depends on persistent-agent layer) *(BUILD_PLAN.md Tier 1+)* -- [ ] Caveman input-side prompt compression (rewrite execute-task/plan-slice prompts) *(BUILD_PLAN.md Tier 1+)* -- [ ] Runtime input preprocessor (`terse_prompts: true` dispatch transform, ~3-4 days) *(BUILD_PLAN.md Tier 1+)* -- [ ] Judge calibration + eval runner service (Go/Charm, ~2-3 weeks post SM) *(BUILD_PLAN.md Tier 1+)* -- [ ] M009 promote-only adoption review — create `sf schedule` entry (2 weeks after M009 close) *(BACKLOG.md)* -- [ ] Establish pi-mono SDK sync cadence (recurring check schedule) *(BUILD_PLAN.md Tier 1+)* -- [ ] `scripts/port-from-gsd2.sh` automation script *(UPSTREAM_PORT_GUIDE.md)* -- [ ] TypeScript migration for UOK modules (`kernel.js`, etc.) *(PRODUCTION_AUDIT_COMPLETE.md, PRODUCTION_AUDIT_GRADE.md)*