From b0fce94f9e7f3254e55f76c7404098d30542d516 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 7 May 2026 18:17:41 +0200 Subject: [PATCH] feat: record retrieval evidence across context tools --- .../2026-05-07-cli-agent-code-survey.md | 148 ++++++++++++++- docs/records/index.md | 2 +- docs/user-docs/providers.md | 17 +- gitbook/configuration/providers.md | 16 +- gitbook/reference/environment-variables.md | 4 +- packages/pi-ai/src/env-api-keys.test.ts | 9 +- packages/pi-ai/src/env-api-keys.ts | 8 +- .../pi-ai/src/web-runtime-env-api-keys.ts | 5 +- .../pi-coding-agent/src/core/auth-storage.ts | 25 ++- .../src/core/fallback-resolver.test.ts | 41 +---- .../src/core/fallback-resolver.ts | 88 ++------- .../core/model-registry-env-fallback.test.ts | 59 ++++++ .../src/core/model-registry.ts | 21 +++ .../pi-coding-agent/src/core/retry-handler.ts | 146 +++++++++------ .../src/core/settings-manager.ts | 4 +- .../interactive/components/model-selector.ts | 4 +- src/resources/extensions/context7/index.js | 121 +++++++++++++ .../extensions/search-the-web/tool-search.js | 103 +++++++++++ .../extensions/sf/bootstrap/query-tools.js | 56 ++++++ .../extensions/sf/doctor-providers.js | 5 +- .../extensions/sf/extension-manifest.json | 1 + .../extensions/sf/preferences-models.js | 17 +- .../extensions/sf/provider-env-auth.js | 73 ++++++++ .../extensions/sf/retrieval-evidence.js | 66 +++++++ src/resources/extensions/sf/sf-db.js | 113 +++++++++++- .../context7-retrieval-evidence.test.mjs | 124 +++++++++++++ .../sf/tests/doctor-providers.test.mjs | 57 ++++++ .../sf/tests/preferences-models.test.mjs | 93 ++++++++++ .../sf/tests/sf-db-migration.test.mjs | 38 +++- .../sf/tests/sift-retrieval-evidence.test.mjs | 168 ++++++++++++++++++ .../web-search-retrieval-evidence.test.mjs | 136 ++++++++++++++ .../extensions/sf/tools/sift-search-tool.js | 53 +++++- src/resources/extensions/subagent/index.js | 64 +++++++ 33 files changed, 1661 insertions(+), 224 deletions(-) create mode 100644 src/resources/extensions/sf/provider-env-auth.js create mode 100644 src/resources/extensions/sf/retrieval-evidence.js create mode 100644 src/resources/extensions/sf/tests/context7-retrieval-evidence.test.mjs create mode 100644 src/resources/extensions/sf/tests/preferences-models.test.mjs create mode 100644 src/resources/extensions/sf/tests/sift-retrieval-evidence.test.mjs create mode 100644 src/resources/extensions/sf/tests/web-search-retrieval-evidence.test.mjs diff --git a/docs/records/2026-05-07-cli-agent-code-survey.md b/docs/records/2026-05-07-cli-agent-code-survey.md index 6c174eb57..6d2162044 100644 --- a/docs/records/2026-05-07-cli-agent-code-survey.md +++ b/docs/records/2026-05-07-cli-agent-code-survey.md @@ -1,14 +1,16 @@ -# CLI Agent Code Survey — 2026-05-07 +# SF + ACE Full-Stack Reference Survey — 2026-05-07 -This record compares the local coding-agent checkouts under `/home/mhugo/code/` -against Forge. It is planning evidence, not an instruction to copy another -product's architecture. +This record compares local coding-agent, orchestration, retrieval, model, and +platform-engineering references under `/home/mhugo/code/` plus selected indexed +public references against the intended SF+ACE full-stack flow. It is planning +evidence, not an instruction to copy another product's architecture. ## Product Boundary -Forge remains the product, and UOK remains the internal execution kernel. -External CLIs are reference implementations used to sharpen Forge, not -destination architectures. +Forge remains the local product/runtime surface, ACE remains the higher-level +workflow/control-plane layer, and UOK remains the internal execution safety +kernel. External systems are reference implementations used to sharpen the +unified SF+ACE flow, not destination architectures. Hard boundary: Forge must stay an MCP client only. Do not add, restore, or plan an SF MCP server. External control belongs in daemon, RPC, and headless @@ -38,10 +40,50 @@ Additional coder references: - `open-codex` - `letta-code` - `neovate-code` +- `symphony` +- `singularity/machine` (`codemachine`) + +Indexed-only references to include in future passes: + +- `kimi-cli` / Kimi Code +- Spec Kit +- upstream CodeMachine CLI (`moazbuilds/CodeMachine-CLI`) The local `claude-code` checkout is a leaked-source/sourcemap research mirror, not a clean upstream dependency. Treat it as ergonomics evidence only. +## SF + ACE Full-Stack Reference Map + +The long-term target is a unified SF+ACE autonomous software flow, not a +collection of unrelated coding assistants. Compare each repo at the layer where +it is strongest. + +| Repo / Tool | Full-Stack Layer | Pattern To Study | Evidence Mode | Safe `sift` Scope | +|---|---|---|---|---| +| `singularity-forge` | Local product/runtime | UOK, DB-first state, CLI/TUI/headless, extension tools, MCP-client-only guardrails | local source + `sift` | `docs/`, `src/resources/extensions/sf/`, `packages/*/src/`, tests | +| `ace-coder` | Workflow/control plane | HTDAG/YAML workflow DAGs, reviewers, quality gates, deployment governance, multi-repo memory | local source + `sift` only | `AGENTS.md`, `CLAUDE.md`, `docs/`, `.agents/skills/`, `python/ai_dev/` first-party modules | +| `symphony` | Work orchestration | Linear polling, isolated per-issue workspaces, `WORKFLOW.md`, Codex app-server, retries, PR review/landing | local source + Context7 `/openai/symphony` | `README.md`, `SPEC.md`, `elixir/WORKFLOW.md`, `elixir/AGENTS.md`, `.codex/skills/` | +| `codemachine` | Multi-agent workflow engine | Engine matrix, SmartRouter, spec-to-code workflow templates, feature flags, tool health | local fork/source + web upstream | `README.md`, `docs/architecture/`, `templates/workflows/`, `prompts/agents/`, `prompts/moderator/` | +| Amplication | Platform/golden paths | Live templates, service catalog, plugin codegen, generated service lifecycle, compliance/drift | web/GitHub; clone before local planning | `docs/`, `packages/*/src/`, plugin/codegen packages if cloned | +| Spec Kit | Spec-driven artifacts | Constitution, scenarios, FR/SC IDs, spec -> plan -> tasks -> analyze -> implement | Context7 `/github/spec-kit` | templates/docs/spec workflows if cloned | +| `plandex` | Large-task implementation | Cumulative diff sandbox, plan versioning, context loading, apply/debug loop | local source + Context7 | `README.md`, `app/cli/lib/`, `app/server/db/`, first-party docs | +| `aider` | Edit loop/context map | Repo-map ranking, edit formats, lint/test repair, benchmark metadata | local source + Context7 | `aider/`, `benchmark/`, `tests/`, docs; avoid generated website data unless needed | +| `Agentless` | Bug repair/evals | Localization -> repair -> patch validation, reproduction tests, reranking | local source | `agentless/fl/`, `agentless/repair/`, `agentless/test/`, benchmark docs | +| SWE-agent/OpenHands | Bug repair/runtime research | issue-to-patch loops, sandbox/runtime harnesses, SWE-bench evaluation | Context7/web or local clone if added | source/docs/evals only when cloned | +| `codex` | Execution substrate | Sandbox profiles, approval policy, app-server protocol, typed events, AGENTS scope | local source + Context7 `/openai/codex` | `docs/`, `codex-rs/protocol/src/`, `codex-rs/exec/src/`, `codex-rs/linux-sandbox/`; avoid `vendor/` | +| `claude-code` | UX reference | Permissions, commands, plugins, MCP client UX, subagent UX | local source only; leaked mirror caveat | `src/commands/`, `src/services/mcp/`, `src/tools/`, `src/components/` | +| `qwen-code` | Terminal workflow | trusted folders, subagent fork design, terminal-capture tests, provider config | local source + Context7 | `docs/`, `packages/*/src/`, `integration-tests/terminal-capture/` | +| Kimi Code | Model-specific coding agent | long-context coding, Kimi CLI/IDE flow, model-plan comparison | Context7 `/moonshotai/kimi-cli` | docs/source if cloned | +| CodeGeeX2 | Model capability | multilingual code model, HumanEval-X/DS1000, local deployment/quantization | web/GitHub | benchmark/evaluation/docs if cloned | +| `gemini-cli` | Provider CLI/testing | release channels, generated schemas/docs, eval promotion, perf/memory tests | local source + Context7 if needed | `docs/`, `evals/`, `perf-tests/`, `memory-tests/`, `packages/*/src/` | +| `opencode` | Mode/schema boundary | plan/build modes, client/server, project-local commands/tools, canonical schema | local source + Context7 | `README.md`, `.opencode/`, `specs/`, `packages/opencode/specs/`, `packages/opencode/src/` | +| `crush` | Local runtime/TUI | SQLite/sqlc, hooks, permissions, LSP, MCP client status, Bubble Tea UI | local source | `internal/db/`, `internal/hooks/`, `internal/permission/`, `internal/agent/tools/`, `internal/ui/` | +| `goose` | Desktop/CLI/API agent | diagnostics, API embedding, provider/extension breadth, MCP client lifecycle | local source | `crates/`, `documentation/`, `ui/desktop/`; do not copy server posture | +| `letta-code` | Long-lived memory | persistent agent memory, approval recovery, skills, channel/remote UX | local source | `src/agent/`, `src/permissions/`, `src/cli/`, `src/tests/` | +| `OpenAgents` | Full-stack multi-agent platform | backend/frontend/agent split, one-agent-one-folder, plugin/data/web agents, adapters | web/GitHub; clone before local planning | `backend/`, `frontend/`, `real_agents/` if cloned | +| Claude Context / Context+ | Code context retrieval | vector-backed semantic code search, MCP-client integration, context cost reduction | Context7/web | code search/indexing packages if cloned | +| `amazon-q-developer-cli` | Rust auth/security | auth, security, workspace patterns, Rust CLI lessons | local source; lower priority | `crates/chat-cli/`, `crates/agent/`, docs | + ## Comparison Matrix | Reference | Strongest Fit For Forge | Borrow | Avoid | @@ -62,6 +104,10 @@ not a clean upstream dependency. Treat it as ergonomics evidence only. | `neovate-code` | Design-doc and terminal UX iteration | Small design records, queued-message designs, subagent design notes, command/terminal UX records | Pulling in provider-specific branding or immature UX churn | | `amazon-q-developer-cli` | Rust auth/security reference | Auth/security/workspace patterns and Rust CLI lessons where applicable | Product direction; local README says the open source project is no longer actively maintained | | `open-codex` | Older/forked approval-mode comparison | Approval-mode vocabulary and provider abstraction history | Fork-specific Chat Completions direction as a primary architecture | +| `symphony` | Work orchestration above individual agents | Issue-tracker polling, per-issue isolated workspaces, repo-owned `WORKFLOW.md`, Codex app-server lifecycle, retries, operator state, CI/PR review and landing loops | High-trust unattended defaults without Forge's UOK gates and DB-first runtime evidence | +| `codemachine` | Multi-agent spec-to-code orchestration | Engine matrix, SmartRouter routing, heterogeneous agents, spec-to-code templates, feature flags, tool health, local workflow examples, upstream repeatable long-running workflow model | Optional MCP-server/tooling posture and Bun-specific implementation assumptions | +| Kimi Code | Long-context model-specific coding agent | Kimi CLI/IDE workflow, long-context coding, subagent-oriented terminal automation, model-plan comparison | Treating provider-specific subscription/API behavior as a Forge architecture | +| Spec Kit | Spec-driven development workflow | Constitution, prioritized user scenarios, acceptance criteria, functional requirements, measurable success criteria, spec -> plan -> tasks -> implement -> analyze loop | Replacing Forge PDD/UOK with a generic spec template instead of mapping useful pieces into PDD fields | ## Forge Already Has @@ -136,6 +182,20 @@ surfaces instead of adding parallel state systems. - Stop rule: do not implement any SF MCP server, MCP worker backend, or bundled/re-exported MCP server. +10. **Work orchestration above single agent sessions** + - Use OpenAI Symphony and CodeMachine as references. + - Target Forge surfaces: durable queue/roadmap dispatch, isolated working + directories, issue/task lifecycle state, retry/backoff, per-run + observability, proof-of-work handoff, and CI/PR review/landing loops. + - Stop rule: orchestration must feed UOK and DB-backed state instead of + bypassing Forge's safety gates. + +11. **Spec-driven artifact pipeline** + - Use Spec Kit and CodeMachine as references. + - Target Forge surfaces: convert intent into PDD fields, prioritized slices, + acceptance criteria, functional requirements, measurable success criteria, + task generation, and consistency analysis before implementation. + ## Priority Order P0: @@ -155,12 +215,16 @@ P1: - Add cumulative diff review and evidence metadata. - Expand UOK evals with Agentless-style localization/repair/validation cases. - Add MCP client state/status/config hardening without adding any MCP server. +- Add durable orchestration contracts for issue/task queues, isolated workspaces, + retry policy, proof-of-work, and review/landing loops. P2: - Improve terminal command discovery and permission UX. - Generate settings/environment docs from typed schemas. - Compare memory lifecycle/recovery against Letta and ACE. +- Map Spec Kit scenario/requirement/success-criteria templates into Forge PDD + fields without replacing PDD. ## Evidence Pointers @@ -201,6 +265,10 @@ The follow-up subagent pass inspected these concrete local paths: - `ace-coder/docs/MCP_SERVER.md`, `ace-coder/docs/plans/2026-04-05-mcp-daemon-refactor.md`, `ace-coder/python/ai_dev/mcp/`. +- `symphony/README.md`, `symphony/SPEC.md`, `symphony/elixir/WORKFLOW.md`, + `symphony/elixir/AGENTS.md`, and `.codex/skills/land/SKILL.md`. +- `singularity/machine/README.md`, `package.json`, `templates/workflows/`, + `docs/architecture/engine-matrix.md`, and `docs/OPENAI_SPECS_DOWNLOAD.md`. ## Context7 Cross-Check @@ -226,14 +294,58 @@ snapshot available on this machine. `/websites/qwenlm_github_io_qwen-code-docs`, and `/websites/qwenlm_github_io_qwen-code-docs_en`. - OpenCode: `/anomalyco/opencode`. + - OpenAI Symphony: `/openai/symphony`. + - Kimi Code: `/moonshotai/kimi-cli`, + `/websites/moonshotai_github_io_kimi-cli_en`, and `/websites/kimi_code`. + - Spec Kit: `/github/spec-kit` and `/websites/github_github_io_spec-kit`. + - Upstream CodeMachine CLI did not resolve by name in Context7 during this + pass, but GitHub confirms `https://github.com/moazbuilds/CodeMachine-CLI` + as the public upstream-style repo for CodeMachine CLI. The local checkout + inspected is `https://github.com/singularity-ng/machine.git`, so treat it as + local fork/mirror evidence rather than exact upstream state. ## Local Sift Cross-Check ACE is private/local and should not be treated as Context7-indexed. Use `sift` for ACE and Forge when checking private or machine-local architecture. +For dependency hygiene, do not run broad `sift search` over repo roots that may +contain vendored dependencies, package caches, build output, or generated blobs. +This `sift` install does not expose an exclude flag, so scope searches to +first-party paths such as `docs/`, `src/`, `packages/*/src/`, `specs/`, +`AGENTS.md`, `CLAUDE.md`, and known design files. Avoid `node_modules/`, +`vendor/`, `dist/`, `build/`, `target/`, `.venv/`, caches, fixture dumps, and +generated lock/schema/output directories unless the dependency surface itself is +the subject of the question. + The targeted `sift` pass found: +- Codex `codex-rs/protocol/src/config_types.rs` and `protocol.rs`: confirms + first-party typed approval policy and sandbox mode surfaces without searching + `codex-rs/vendor/`. +- OpenCode `packages/opencode/specs/effect/schema.md`: confirms the + schema-first rule to prefer one canonical schema definition and derive + compatibility schemas instead of maintaining parallel sources of truth. +- Aider first-party docs/tests: confirms local repo-map/edit-format/lint/test + and commit behavior surfaces. +- Plandex `README.md`, changelog, and first-party app model files: confirms the + cumulative diff sandbox, controlled command execution, rollback/debug loop, + and planning phases. +- Qwen Code `docs/`: confirms terminal-capture integration tests, trusted + folders documentation, and provider configuration docs. +- RA.Aid first-party docs/source: confirms shell command approval bypass via + `--cowboy-mode`, research/planning agents, and session/logging surfaces. +- Symphony first-party spec/workflow files: confirm issue-tracker polling, + per-issue workspace isolation, repo-owned `WORKFLOW.md`, Codex app-server + lifecycle, max turns/concurrency, retry/backoff, state snapshots, token/rate + observability, PR feedback sweeps, and land-loop skills. +- CodeMachine first-party docs/templates: confirm local multi-agent + orchestration, heterogeneous engine routing, spec-to-code workflow templates, + feature-flag governance, health/status commands, and optional MCP tooling. + GitHub upstream `moazbuilds/CodeMachine-CLI` confirms the public product + framing: repeatable long-running workflows, multi-agent orchestration, + parallel execution, context engineering, and headless scripting of coding + engines such as Claude Code, Codex, Cursor, and others. - ACE `AGENTS.md`: confirms the repo-local Claude MCP client contract, hard stops, skills, reviewer workflow, quality gate, and the warning that ACE's autonomous system uses its own code/YAML workflow DAGs rather than @@ -249,6 +361,28 @@ The targeted `sift` pass found: - Forge `docs/records/2026-05-07-cli-agent-code-survey.md`: now records the MCP-client-only product boundary and roadmap pull-through. +## Implementation Follow-Up + +The first DB-backed retrieval slice landed with schema v41: + +- `retrieval_evidence` records backend, source kind, query, strategy, scope, + project root, git head/branch, worktree dirty flag, freshness, status, hit + count, elapsed time, cache path, error, result metadata, and timestamp. +- `sift_search` and `codebase_search` write retrieval evidence for successful + and failed searches. +- Native Context7 `resolve_library` and `get_library_docs` write docs retrieval + evidence with `freshness=external-index`. +- `search-the-web` writes web retrieval evidence with `freshness=external-live` + for success, cache hits, missing-provider errors, duplicate-loop stops, + budget exhaustion, aborts, and provider failures. +- `sf_retrieval_evidence` exposes the rows through the SF read-only DB tool + surface so agents do not query `.sf/sf.db` directly. +- Sift telemetry now uses the no-op debug logger; telemetry failures no longer + turn successful searches into failed tool calls. + +Next slices should wrap `search_and_read` and `fetch_page` results in the same +evidence contract before using them for planning. + ## Resulting Direction Forge should absorb proven patterns into UOK and the existing DB-first runtime: diff --git a/docs/records/index.md b/docs/records/index.md index 655e00375..96e5139b9 100644 --- a/docs/records/index.md +++ b/docs/records/index.md @@ -7,5 +7,5 @@ Repo-memory audits, decision ledgers, context-gardening notes, and records-keepe | Date | Note | Summary | |------|------|---------| | 2026-05-01 | [repo-vcs and notifications](./2026-05-01-repo-vcs-and-notifications.md) | repo-vcs skill landed; notification specs drafted; JSDoc annotations added; placeholder docs filled | -| 2026-05-07 | [cli agent code survey](./2026-05-07-cli-agent-code-survey.md) | compared local CLI agent checkouts plus Context7 cross-checks; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening | +| 2026-05-07 | [SF + ACE full-stack reference survey](./2026-05-07-cli-agent-code-survey.md) | repo-wise map of coding agents, orchestration systems, retrieval tools, model references, and platform/golden-path systems; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, orchestration, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening | | 2026-05-07 | [strategy alignment](./2026-05-07-strategy-alignment.md) | aligned top-level docs and roadmap framing around Forge as product, UOK as kernel, and external CLIs as sharpening inputs | diff --git a/docs/user-docs/providers.md b/docs/user-docs/providers.md index 64ecf2c8c..e49f3b7e1 100644 --- a/docs/user-docs/providers.md +++ b/docs/user-docs/providers.md @@ -32,7 +32,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t |----------|-------------|-------------|-------------| | Anthropic | API key | `ANTHROPIC_API_KEY` | — | | OpenAI | API key | `OPENAI_API_KEY` | — | -| Google Gemini | Gemini CLI Core auth | — | `~/.gemini/oauth_creds.json` | +| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` | `~/.gemini/oauth_creds.json` | | OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` | | Groq | API key | `GROQ_API_KEY` | — | | xAI | API key | `XAI_API_KEY` | — | @@ -92,8 +92,19 @@ Authenticate there once and let SF reuse the stored auth state. gemini login ``` -SF intentionally ignores ambient `GEMINI_API_KEY` and -`GOOGLE_GENERATIVE_AI_API_KEY` values for Forge runtime selection. +By default, Forge ignores ambient `GEMINI_API_KEY` and +`GOOGLE_GENERATIVE_AI_API_KEY` values for runtime selection. To allow env auth +for the direct `google` provider, set it in config: + +```json +{ + "providerEnvAuth": { + "providers": { + "google": "on" + } + } +} +``` ### OpenRouter diff --git a/gitbook/configuration/providers.md b/gitbook/configuration/providers.md index 3e99f5563..f92ec8c87 100644 --- a/gitbook/configuration/providers.md +++ b/gitbook/configuration/providers.md @@ -8,7 +8,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t |----------|-------------|---------------------| | Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | | OpenAI | API key | `OPENAI_API_KEY` | -| Google Gemini | Gemini CLI Core auth | `~/.gemini/oauth_creds.json` | +| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` | | OpenRouter | API key | `OPENROUTER_API_KEY` | | Groq | API key | `GROQ_API_KEY` | | xAI (Grok) | API key | `XAI_API_KEY` | @@ -58,8 +58,18 @@ Authenticate Gemini CLI Core once and let SF reuse that state: gemini login ``` -SF intentionally ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY` -for Forge runtime selection. +Forge ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY` by default. +To let the direct `google` provider use env auth, enable it in config: + +```json +{ + "providerEnvAuth": { + "providers": { + "google": "on" + } + } +} +``` ### OpenRouter diff --git a/gitbook/reference/environment-variables.md b/gitbook/reference/environment-variables.md index 5e3e80eb1..704f9803b 100644 --- a/gitbook/reference/environment-variables.md +++ b/gitbook/reference/environment-variables.md @@ -18,8 +18,8 @@ |----------|----------| | `ANTHROPIC_API_KEY` | Anthropic (Claude) | | `OPENAI_API_KEY` | OpenAI | -| `GEMINI_API_KEY` | Google Gemini (ignored by Forge runtime; Gemini CLI Core auth is used instead) | -| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (ignored by Forge runtime) | +| `GEMINI_API_KEY` | Google Gemini (available to the direct `google` provider, but disabled by default via `providerEnvAuth`) | +| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (same policy as `GEMINI_API_KEY`) | | `OPENROUTER_API_KEY` | OpenRouter | | `GROQ_API_KEY` | Groq | | `XAI_API_KEY` | xAI (Grok) | diff --git a/packages/pi-ai/src/env-api-keys.test.ts b/packages/pi-ai/src/env-api-keys.test.ts index ff1908f9b..707e1f2aa 100644 --- a/packages/pi-ai/src/env-api-keys.test.ts +++ b/packages/pi-ai/src/env-api-keys.test.ts @@ -3,7 +3,7 @@ import { describe, it } from "vitest"; import { getEnvApiKey } from "./env-api-keys.js"; describe("getEnvApiKey", () => { - it("ignores GEMINI_API_KEY for google when present", () => { + it("uses GEMINI_API_KEY for google when present", () => { const savedGemini = process.env.GEMINI_API_KEY; const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY; @@ -11,8 +11,7 @@ describe("getEnvApiKey", () => { process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key"; try { - assert.equal(getEnvApiKey("google"), undefined); - assert.equal(getEnvApiKey("google-gemini-cli"), undefined); + assert.equal(getEnvApiKey("google"), "gemini-key"); } finally { if (savedGemini === undefined) delete process.env.GEMINI_API_KEY; else process.env.GEMINI_API_KEY = savedGemini; @@ -22,7 +21,7 @@ describe("getEnvApiKey", () => { } }); - it("ignores GOOGLE_GENERATIVE_AI_API_KEY for google", () => { + it("accepts GOOGLE_GENERATIVE_AI_API_KEY for google", () => { const savedGemini = process.env.GEMINI_API_KEY; const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY; @@ -30,7 +29,7 @@ describe("getEnvApiKey", () => { process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key"; try { - assert.equal(getEnvApiKey("google"), undefined); + assert.equal(getEnvApiKey("google"), "google-generative-key"); } finally { if (savedGemini === undefined) delete process.env.GEMINI_API_KEY; else process.env.GEMINI_API_KEY = savedGemini; diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index 2a0af2c70..a016211fd 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -73,13 +73,6 @@ function hasVertexAdcCredentials(): boolean { export function getEnvApiKey(provider: KnownProvider): string | undefined; export function getEnvApiKey(provider: string): string | undefined; export function getEnvApiKey(provider: any): string | undefined { - // Forge routes Gemini-family models through google-gemini-cli, which owns - // auth via Gemini CLI Core state. Intentionally ignore Google API-key env vars - // here so ambient GEMINI_API_KEY values do not change provider selection. - if (provider === "google" || provider === "google-gemini-cli") { - return undefined; - } - // Fall back to environment variables if (provider === "github-copilot") { return ( @@ -161,6 +154,7 @@ export function getEnvApiKey(provider: any): string | undefined { const envMap: Record = { openai: "OPENAI_API_KEY", "azure-openai-responses": "AZURE_OPENAI_API_KEY", + google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"], groq: "GROQ_API_KEY", cerebras: "CEREBRAS_API_KEY", xai: "XAI_API_KEY", diff --git a/packages/pi-ai/src/web-runtime-env-api-keys.ts b/packages/pi-ai/src/web-runtime-env-api-keys.ts index 75e90d12f..950f848ec 100644 --- a/packages/pi-ai/src/web-runtime-env-api-keys.ts +++ b/packages/pi-ai/src/web-runtime-env-api-keys.ts @@ -37,10 +37,6 @@ function hasVertexAdcCredentials(): boolean { export function getEnvApiKey(provider: KnownProvider): string | undefined; export function getEnvApiKey(provider: string): string | undefined; export function getEnvApiKey(provider: string): string | undefined { - if (provider === "google" || provider === "google-gemini-cli") { - return undefined; - } - if (provider === "github-copilot") { return ( process.env.COPILOT_GITHUB_TOKEN || @@ -93,6 +89,7 @@ export function getEnvApiKey(provider: string): string | undefined { const envMap: Record = { openai: "OPENAI_API_KEY", "azure-openai-responses": "AZURE_OPENAI_API_KEY", + google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"], groq: "GROQ_API_KEY", cerebras: "CEREBRAS_API_KEY", xai: "XAI_API_KEY", diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index 841b4c198..3253e88e3 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -32,6 +32,7 @@ import { getAgentDir } from "../config.js"; import { AUTH_LOCK_STALE_MS } from "./constants.js"; import { acquireLockAsync, acquireLockSyncWithRetry } from "./lock-utils.js"; import { resolveConfigValueAsync } from "./resolve-config-value.js"; +import type { ProviderEnvAuthMode } from "./settings-manager.js"; export type ApiKeyCredential = { type: "api_key"; @@ -266,6 +267,7 @@ export class AuthStorage { private data: AuthStorageData = {}; private runtimeOverrides: Map = new Map(); private fallbackResolver?: (provider: string) => string | undefined; + private envAuthModeResolver?: (provider: string) => ProviderEnvAuthMode; private loadError: Error | null = null; private errors: Error[] = []; private credentialChangeListeners: Set<() => void> = new Set(); @@ -337,6 +339,12 @@ export class AuthStorage { this.fallbackResolver = resolver; } + setEnvAuthModeResolver( + resolver: (provider: string) => ProviderEnvAuthMode, + ): void { + this.envAuthModeResolver = resolver; + } + /** * Register a callback to be notified when credentials change (e.g., after OAuth token refresh). * Returns a function to unregister the listener. @@ -500,7 +508,7 @@ export class AuthStorage { hasAuth(provider: string): boolean { if (this.runtimeOverrides.has(provider)) return true; if (this.data[provider]) return true; - if (getEnvApiKey(provider)) return true; + if (this.getConfiguredEnvApiKey(provider)) return true; if (this.fallbackResolver?.(provider)) return true; return false; } @@ -982,9 +990,8 @@ export class AuthStorage { // All credentials backed off or unresolvable - fall through to env/fallback } - // Fall back to environment variable. Gemini-family providers intentionally - // ignore ambient GEMINI_API_KEY values via getEnvApiKey(). - const envKey = getEnvApiKey(providerId); + // Fall back to environment variable when provider policy allows it. + const envKey = this.getConfiguredEnvApiKey(providerId); if (envKey) { // Block Google OAuth tokens from environment variables (e.g., GEMINI_API_KEY=ya29.*) if ( @@ -1007,6 +1014,16 @@ export class AuthStorage { return this.fallbackResolver?.(providerId) ?? undefined; } + private getConfiguredEnvApiKey(provider: string): string | undefined { + const mode = + this.envAuthModeResolver?.(provider) ?? + (provider === "google" || provider === "google-gemini-cli" + ? "off" + : "auto"); + if (mode === "off") return undefined; + return getEnvApiKey(provider); + } + /** * Get all registered OAuth providers */ diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts index c0499381c..d6fdb8da3 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts @@ -82,14 +82,13 @@ function createResolver(overrides?: { // ─── findFallback ──────────────────────────────────────────────────────────── describe("FallbackResolver — findFallback", () => { - it("returns next available provider when current fails", async () => { + it("reselects from the current available models when current fails", async () => { const { resolver } = createResolver(); const result = await resolver.findFallback(zaiModel, "quota_exhausted"); assert.notEqual(result, null); assert.equal(result!.model.provider, "alibaba"); - assert.equal(result!.model.id, "glm-5"); - assert.equal(result!.chainName, "coding"); + assert.equal(result!.chainName, "fresh-selection"); }); it("marks current provider as exhausted for rate_limit errors", async () => { @@ -142,12 +141,12 @@ describe("FallbackResolver — findFallback", () => { assert.equal(result, null); }); - it("falls back to free selection when model is not in any chain", async () => { + it("reselects from scratch when model is not in any chain", async () => { const { resolver } = createResolver(); const unknownModel = createMockModel("unknown", "some-model"); const result = await resolver.findFallback(unknownModel, "quota_exhausted"); assert.notEqual(result, null); - assert.equal(result!.chainName, "free-selection"); + assert.equal(result!.chainName, "fresh-selection"); // Should pick an available model with different provider assert.notEqual(result!.model.provider, "unknown"); }); @@ -208,11 +207,7 @@ describe("FallbackResolver — findFallback", () => { it("skips providers with no model in registry", async () => { const { resolver } = createResolver({ - find: (provider: string, modelId: string) => { - if (provider === "alibaba") return undefined; - if (provider === "openai" && modelId === "gpt-4.1") return openaiModel; - return undefined; - }, + getAvailable: () => [openaiModel], }); const result = await resolver.findFallback(zaiModel, "quota_exhausted"); @@ -225,33 +220,9 @@ describe("FallbackResolver — findFallback", () => { // ─── checkForRestoration ───────────────────────────────────────────────────── describe("FallbackResolver — checkForRestoration", () => { - it("returns higher-priority provider when recovered", async () => { + it("returns null because restoration is disabled", async () => { const { resolver } = createResolver(); const result = await resolver.checkForRestoration(alibabaModel); - - assert.notEqual(result, null); - assert.equal(result!.model.provider, "zai"); - assert.equal(result!.model.id, "glm-5"); - }); - - it("returns null when already at highest priority", async () => { - const { resolver } = createResolver(); - const result = await resolver.checkForRestoration(zaiModel); - assert.equal(result, null); - }); - - it("returns null when higher-priority provider is still backed off", async () => { - const { resolver } = createResolver({ - isProviderAvailable: (provider: string) => provider !== "zai", - }); - - const result = await resolver.checkForRestoration(alibabaModel); - assert.equal(result, null); - }); - - it("returns null when fallback is disabled", async () => { - const { resolver } = createResolver({ enabled: false }); - const result = await resolver.checkForRestoration(alibabaModel); assert.equal(result, null); }); }); diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.ts b/packages/pi-coding-agent/src/core/fallback-resolver.ts index ca52b11bd..7d3fb746b 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts @@ -2,11 +2,10 @@ // Copyright (c) 2026 Jeremy McSpadden /** - * FallbackResolver - Cross-provider fallback when rate/quota limits are hit. + * FallbackResolver - Fresh model reselection when rate/quota limits are hit. * - * When a provider's credentials are all exhausted, this resolver finds the next - * available provider+model from a user-configured fallback chain. It also handles - * restoration: checking if a higher-priority provider has recovered before each request. + * When a provider/model becomes unhealthy, this resolver picks a fresh model from + * the current available registry rather than walking a preconfigured fallback chain. */ import type { Api, Model } from "@singularity-forge/pi-ai"; @@ -31,20 +30,16 @@ export class FallbackResolver { ) {} /** - * Find the next available fallback for a model that just failed. - * Searches all chains for entries matching the current model's provider+id, - * then returns the next available entry with lower priority (higher number). + * Find a fresh replacement for a model that just failed. + * Ignores fallback chains and reselects from the current available registry. * - * If no chain contains the current model, falls through to free selection: - * picks any available model from the registry with a different provider. - * - * @returns FallbackResult if a fallback is available, null otherwise + * @returns FallbackResult if a replacement is available, null otherwise */ async findFallback( currentModel: Model, errorType: UsageLimitErrorType, ): Promise { - const { enabled, chains } = this.settingsManager.getFallbackSettings(); + const { enabled } = this.settingsManager.getFallbackSettings(); if (!enabled) return null; // Mark the current provider as exhausted at the provider level. @@ -55,75 +50,16 @@ export class FallbackResolver { this.authStorage.markProviderExhausted(currentModel.provider, errorType); } - // Search all chains for one containing the current model - for (const [chainName, entries] of Object.entries(chains)) { - const currentIndex = entries.findIndex( - (e) => - e.provider === currentModel.provider && e.model === currentModel.id, - ); - - if (currentIndex === -1) continue; - - // Try entries after the current one (already sorted by priority) - const result = await this._findAvailableInChain( - chainName, - entries, - currentIndex + 1, - ); - if (result) return result; - - // Wrap around: try entries before the current one - const wrapResult = await this._findAvailableInChain( - chainName, - entries, - 0, - currentIndex, - ); - if (wrapResult) return wrapResult; - } - - // No chain contained the current model — fall through to free selection - // from any available model in the registry with a different provider. return this._findAnyAvailableFallback(currentModel); } /** - * Check if a higher-priority provider in the chain has recovered. - * Called before each LLM request to restore the best available provider. - * - * @returns FallbackResult if a better provider is available, null if current is best + * Automatic restoration is disabled when replacement is always reselected + * from scratch instead of following a chain. */ async checkForRestoration( - currentModel: Model, + _currentModel: Model, ): Promise { - const { enabled, chains } = this.settingsManager.getFallbackSettings(); - if (!enabled) return null; - - for (const [chainName, entries] of Object.entries(chains)) { - const currentIndex = entries.findIndex( - (e) => - e.provider === currentModel.provider && e.model === currentModel.id, - ); - - if (currentIndex === -1) continue; - - // Only check entries with higher priority (lower index = higher priority) - if (currentIndex === 0) continue; // Already at highest priority - - const result = await this._findAvailableInChain( - chainName, - entries, - 0, - currentIndex, - ); - if (result) { - return { - ...result, - reason: `${result.model.provider}/${result.model.id} recovered, restoring from fallback`, - }; - } - } - return null; } @@ -227,8 +163,8 @@ export class FallbackResolver { const chosen = candidates[0]; return { model: chosen, - chainName: "free-selection", - reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`, + chainName: "fresh-selection", + reason: `reselected ${chosen.provider}/${chosen.id} from available models`, }; } } diff --git a/packages/pi-coding-agent/src/core/model-registry-env-fallback.test.ts b/packages/pi-coding-agent/src/core/model-registry-env-fallback.test.ts index eecff844c..b5db736ed 100644 --- a/packages/pi-coding-agent/src/core/model-registry-env-fallback.test.ts +++ b/packages/pi-coding-agent/src/core/model-registry-env-fallback.test.ts @@ -2,6 +2,7 @@ import assert from "node:assert/strict"; import { describe, it } from "vitest"; import type { AuthStorage } from "./auth-storage.js"; import { ModelRegistry } from "./model-registry.js"; +import { type Settings, SettingsManager } from "./settings-manager.js"; function createRegistryWithCapturedResolver() { let capturedResolver: ((provider: string) => string | undefined) | undefined; @@ -11,6 +12,7 @@ function createRegistryWithCapturedResolver() { ) => { capturedResolver = resolver; }, + setEnvAuthModeResolver: () => {}, onCredentialChange: () => {}, getOAuthProviders: () => [], get: () => undefined, @@ -26,6 +28,29 @@ function createRegistryWithCapturedResolver() { return capturedResolver!; } +function createRegistryWithSettingsAndCapturedResolver( + settings: Partial, +) { + let capturedResolver: ((provider: string) => string | undefined) | undefined; + const authStorage = { + setFallbackResolver: ( + resolver: (provider: string) => string | undefined, + ) => { + capturedResolver = resolver; + }, + setEnvAuthModeResolver: () => {}, + onCredentialChange: () => {}, + getOAuthProviders: () => [], + get: () => undefined, + hasAuth: () => false, + getApiKey: async () => undefined, + } as unknown as AuthStorage; + + new ModelRegistry(authStorage, undefined, SettingsManager.inMemory(settings)); + assert.ok(capturedResolver); + return capturedResolver!; +} + describe("ModelRegistry env fallback resolver (#3782)", () => { it("falls back to built-in provider env vars when models.json has no custom key", () => { const prev = process.env.MINIMAX_API_KEY; @@ -61,4 +86,38 @@ describe("ModelRegistry env fallback resolver (#3782)", () => { } } }); + + it("disables google env fallback by default", () => { + const prev = process.env.GEMINI_API_KEY; + process.env.GEMINI_API_KEY = "gemini-env-test-key"; + + try { + const resolver = createRegistryWithSettingsAndCapturedResolver({}); + assert.equal(resolver("google"), undefined); + } finally { + if (prev === undefined) { + delete process.env.GEMINI_API_KEY; + } else { + process.env.GEMINI_API_KEY = prev; + } + } + }); + + it("allows provider env fallback when providerEnvAuth is on", () => { + const prev = process.env.GEMINI_API_KEY; + process.env.GEMINI_API_KEY = "gemini-env-test-key"; + + try { + const resolver = createRegistryWithSettingsAndCapturedResolver({ + providerEnvAuth: { providers: { google: "on" } }, + }); + assert.equal(resolver("google"), "gemini-env-test-key"); + } finally { + if (prev === undefined) { + delete process.env.GEMINI_API_KEY; + } else { + process.env.GEMINI_API_KEY = prev; + } + } + }); }); diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts index d2c142c5c..006f911d3 100644 --- a/packages/pi-coding-agent/src/core/model-registry.ts +++ b/packages/pi-coding-agent/src/core/model-registry.ts @@ -486,6 +486,19 @@ export class ModelRegistry { discoveryCache?: ModelDiscoveryCache, ) { this.discoveryCache = discoveryCache ?? new ModelDiscoveryCache(); + ( + this.authStorage as { + setEnvAuthModeResolver?: ( + resolver: (provider: string) => string, + ) => void; + } + ).setEnvAuthModeResolver?.( + (provider) => + this.settingsManager?.getProviderEnvAuthMode(provider) ?? + (provider === "google" || provider === "google-gemini-cli" + ? "off" + : "auto"), + ); // Set up fallback resolver for custom provider API keys this.authStorage.setFallbackResolver((provider) => { @@ -493,6 +506,14 @@ export class ModelRegistry { if (keyConfig) { return resolveConfigValue(keyConfig); } + if ( + (this.settingsManager?.getProviderEnvAuthMode(provider) ?? + (provider === "google" || provider === "google-gemini-cli" + ? "off" + : "auto")) === "off" + ) { + return undefined; + } return getEnvApiKey(provider); }); diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 64b98350b..7c19242e4 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -206,8 +206,8 @@ export class RetryHandler { } } - // Cross-provider fallback — for rate limits with all creds backed off, - // quota errors, or auth errors (invalid/expired key — no point retrying). + // Fresh model reselection — for rate limits, quota errors, or auth errors + // once the same-model retry budget has been meaningfully exercised. const isAuthError = errorType === "auth_error"; if (isRateLimit || isQuotaError || isAuthError) { // For quota errors with a retry-after hint, wait before switching providers. @@ -260,67 +260,39 @@ export class RetryHandler { return true; } } - - const fallbackResult = await this._deps.fallbackResolver.findFallback( - this._deps.getModel()!, - errorType, - ); - - if (fallbackResult) { - const previousProvider = this._deps.getModel()!.provider; - this._deps.agent.setModel(fallbackResult.model); - this._deps.onModelChange(fallbackResult.model); - this._removeLastAssistantError(); - - this._deps.emit({ - type: "fallback_provider_switch", - from: `${previousProvider}/${this._deps.getModel()?.id}`, - to: `${fallbackResult.model.provider}/${fallbackResult.model.id}`, - reason: fallbackResult.reason, - }); - - this._deps.emit({ - type: "auto_retry_start", - attempt: this._retryAttempt + 1, - maxAttempts: settings.maxRetries, - delayMs: 0, - errorMessage: `${message.errorMessage} (${fallbackResult.reason})`, - }); - - // Retry immediately with fallback provider - don't increment _retryAttempt - this._scheduleContinue(retryGeneration); - - return true; - } - - // No fallback available either - if (isQuotaError) { - // Try long-context model downgrade ([1m] → base) before giving up - const downgraded = this._tryLongContextDowngrade( + const provider = this._deps.getModel()!.provider; + const authMode = this._deps.modelRegistry.getProviderAuthMode(provider); + const shouldReselectImmediately = + isQuotaError || + isAuthError || + this._isCapacityError(message.errorMessage) || + (isRateLimit && authMode === "externalCli"); + if (shouldReselectImmediately) { + return this._tryFreshModelSelection( message, + errorType, retryGeneration, ); - if (downgraded) return true; - - this._deps.emit({ - type: "fallback_chain_exhausted", - reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`, - }); - this._deps.emit({ - type: "auto_retry_end", - success: false, - attempt: this._retryAttempt, - finalError: message.errorMessage, - }); - this._retryAttempt = 0; - this._resolveRetry(); - return false; } } } this._retryAttempt++; + const errorType = message.errorMessage + ? this._classifyErrorType(message.errorMessage) + : "unknown"; + const isRateLimit = errorType === "rate_limit"; + const isQuotaError = errorType === "quota_exhausted"; + const isAuthError = errorType === "auth_error"; + const reselectionThreshold = Math.min(settings.maxRetries, 3); + if ( + (isRateLimit || isQuotaError || isAuthError) && + this._retryAttempt >= reselectionThreshold + ) { + return this._tryFreshModelSelection(message, errorType, retryGeneration); + } + if (this._retryAttempt > settings.maxRetries) { this._deps.emit({ type: "auto_retry_end", @@ -515,6 +487,72 @@ export class RetryHandler { return "unknown"; } + private _isCapacityError(errorMessage: string): boolean { + return /no capacity|capacity.*available|server.*busy|too busy/i.test( + errorMessage, + ); + } + + private async _tryFreshModelSelection( + message: AssistantMessage, + errorType: UsageLimitErrorType, + retryGeneration: number, + ): Promise { + const replacement = await this._deps.fallbackResolver.findFallback( + this._deps.getModel()!, + errorType, + ); + + if (replacement) { + const previousModel = this._deps.getModel()!; + this._deps.agent.setModel(replacement.model); + this._deps.onModelChange(replacement.model); + this._removeLastAssistantError(); + + this._deps.emit({ + type: "fallback_provider_switch", + from: `${previousModel.provider}/${previousModel.id}`, + to: `${replacement.model.provider}/${replacement.model.id}`, + reason: replacement.reason, + }); + + this._deps.emit({ + type: "auto_retry_start", + attempt: Math.max(this._retryAttempt, 1), + maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (${replacement.reason})`, + }); + + this._scheduleContinue(retryGeneration); + return true; + } + + if (errorType === "quota_exhausted") { + const downgraded = this._tryLongContextDowngrade( + message, + retryGeneration, + ); + if (downgraded) return true; + + this._deps.emit({ + type: "fallback_chain_exhausted", + reason: `No replacement model available for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`, + }); + this._deps.emit({ + type: "auto_retry_end", + success: false, + attempt: this._retryAttempt, + finalError: message.errorMessage, + }); + this._retryAttempt = 0; + this._resolveRetry(); + return false; + } + + return false; + } + /** * Attempt a same-model retry by reducing maxTokens when provider reports * an affordability cap (e.g., "can only afford 329"). diff --git a/packages/pi-coding-agent/src/core/settings-manager.ts b/packages/pi-coding-agent/src/core/settings-manager.ts index 1693ecebe..347e25934 100644 --- a/packages/pi-coding-agent/src/core/settings-manager.ts +++ b/packages/pi-coding-agent/src/core/settings-manager.ts @@ -1002,7 +1002,9 @@ export class SettingsManager { return ( this.settings.providerEnvAuth?.providers?.[provider] ?? this.settings.providerEnvAuth?.default ?? - "auto" + (provider === "google" || provider === "google-gemini-cli" + ? "off" + : "auto") ); } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts index 0de1f9b66..eedefb4fa 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts @@ -13,7 +13,7 @@ import type { ModelRegistry } from "../../../core/model-registry.js"; import type { SettingsManager } from "../../../core/settings-manager.js"; import { theme } from "../theme/theme.js"; import { DynamicBorder } from "./dynamic-border.js"; -import { keyHint } from "./keybinding-hints.js"; +import { keyHint, rawKeyHint } from "./keybinding-hints.js"; /** Display names for providers in the model selector UI. */ const PROVIDER_DISPLAY_NAMES: Record = { @@ -348,7 +348,7 @@ export class ModelSelectorComponent extends Container implements Focusable { return ( keyHint("tab", "scope") + theme.fg("muted", " (all/scoped) ") + - keyHint("d", "disable") + rawKeyHint("d", "disable") ); } diff --git a/src/resources/extensions/context7/index.js b/src/resources/extensions/context7/index.js index 5690fec1f..351056ad3 100644 --- a/src/resources/extensions/context7/index.js +++ b/src/resources/extensions/context7/index.js @@ -29,6 +29,7 @@ import { truncateHead, } from "@singularity-forge/pi-coding-agent"; import { Text } from "@singularity-forge/pi-tui"; +import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js"; // ─── In-session cache ───────────────────────────────────────────────────────── // Keyed by lowercased query string @@ -133,9 +134,33 @@ export default function (pi) { ), }), async execute(_toolCallId, params, signal, _onUpdate, _ctx) { + const startedAt = Date.now(); + const projectRoot = process.cwd(); const cacheKey = params.libraryName.toLowerCase().trim(); if (searchCache.has(cacheKey)) { const cached = searchCache.get(cacheKey); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: params.query + ? `${params.libraryName} ${params.query}` + : params.libraryName, + strategy: "library-search", + scope: params.libraryName, + freshness: "external-index", + status: "ok", + hitCount: cached.length, + elapsedMs: Date.now() - startedAt, + result: { + cached: true, + libraries: cached.map((lib) => ({ + id: lib.id, + title: lib.title, + trustScore: lib.trustScore, + benchmarkScore: lib.benchmarkScore, + })), + }, + }); return { content: [ { @@ -159,6 +184,20 @@ export default function (pi) { libs = Array.isArray(data?.results) ? data.results : []; } catch (err) { const msg = err instanceof Error ? err.message : String(err); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: params.query + ? `${params.libraryName} ${params.query}` + : params.libraryName, + strategy: "library-search", + scope: params.libraryName, + freshness: "external-index", + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: msg, + }); return { content: [{ type: "text", text: `Context7 search failed: ${msg}` }], isError: true, @@ -171,6 +210,28 @@ export default function (pi) { }; } searchCache.set(cacheKey, libs); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: params.query + ? `${params.libraryName} ${params.query}` + : params.libraryName, + strategy: "library-search", + scope: params.libraryName, + freshness: "external-index", + status: "ok", + hitCount: libs.length, + elapsedMs: Date.now() - startedAt, + result: { + cached: false, + libraries: libs.map((lib) => ({ + id: lib.id, + title: lib.title, + trustScore: lib.trustScore, + benchmarkScore: lib.benchmarkScore, + })), + }, + }); return { content: [ { type: "text", text: formatLibraryList(libs, params.libraryName) }, @@ -246,6 +307,8 @@ export default function (pi) { ), }), async execute(_toolCallId, params, signal, _onUpdate, _ctx) { + const startedAt = Date.now(); + const projectRoot = process.cwd(); const tokens = Math.min(Math.max(params.tokens ?? 5000, 500), 10000); // Strip accidental leading @ that some models inject const libraryId = params.libraryId.startsWith("@") @@ -255,6 +318,22 @@ export default function (pi) { const cacheKey = `${libraryId}::${query ?? ""}::${tokens}`; if (docCache.has(cacheKey)) { const cached = docCache.get(cacheKey); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: query ?? "", + strategy: "docs-fetch", + scope: libraryId, + freshness: "external-index", + status: "ok", + hitCount: cached.trim() ? 1 : 0, + elapsedMs: Date.now() - startedAt, + result: { + cached: true, + tokens, + charCount: cached.length, + }, + }); return { content: [{ type: "text", text: cached }], details: { @@ -276,6 +355,19 @@ export default function (pi) { rawText = await apiFetchText(url.toString(), signal); } catch (err) { const msg = err instanceof Error ? err.message : String(err); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: query ?? "", + strategy: "docs-fetch", + scope: libraryId, + freshness: "external-index", + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: msg, + result: { tokens }, + }); return { content: [ { type: "text", text: `Context7 doc fetch failed: ${msg}` }, @@ -296,6 +388,18 @@ export default function (pi) { const notFound = query ? `No documentation found for "${query}" in ${libraryId}. Try a broader query or different library ID.` : `No documentation found for ${libraryId}. Try resolve_library to verify the library ID.`; + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: query ?? "", + strategy: "docs-fetch", + scope: libraryId, + freshness: "external-index", + status: "empty", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + result: { tokens }, + }); return { content: [{ type: "text", text: notFound }], details: { @@ -321,6 +425,23 @@ export default function (pi) { ` Use a more specific query to reduce output size.]`; } docCache.set(cacheKey, finalText); + await recordRetrievalEvidence(projectRoot, { + backend: "context7", + sourceKind: "docs", + query: query ?? "", + strategy: "docs-fetch", + scope: libraryId, + freshness: "external-index", + status: "ok", + hitCount: 1, + elapsedMs: Date.now() - startedAt, + result: { + cached: false, + tokens, + truncated: truncation.truncated, + charCount: finalText.length, + }, + }); return { content: [{ type: "text", text: finalText }], details: { diff --git a/src/resources/extensions/search-the-web/tool-search.js b/src/resources/extensions/search-the-web/tool-search.js index db736e85d..c68056fe3 100644 --- a/src/resources/extensions/search-the-web/tool-search.js +++ b/src/resources/extensions/search-the-web/tool-search.js @@ -18,6 +18,7 @@ import { truncateHead, } from "@singularity-forge/pi-coding-agent"; import { Text } from "@singularity-forge/pi-tui"; +import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js"; import { LRUTTLCache } from "./cache.js"; import { formatSearchResults } from "./format.js"; import { classifyError, fetchWithRetry, fetchWithRetryTimed } from "./http.js"; @@ -58,6 +59,21 @@ export function resetSearchLoopGuardState() { consecutiveDupeCount = 0; sessionTotalSearches = 0; } +function summarizeWebResults(results) { + return results.map((result) => ({ + title: result.title, + url: result.url, + age: result.age, + })); +} +async function recordWebSearchEvidence(projectRoot, entry) { + await recordRetrievalEvidence(projectRoot, { + backend: "search-the-web", + sourceKind: "web", + freshness: "external-live", + ...entry, + }); +} // Summarizer responses: max 50 entries, 15-minute TTL const summarizerCache = new LRUTTLCache({ max: 50, ttlMs: 900_000 }); // ============================================================================= @@ -575,7 +591,18 @@ export function registerSearchTool(pi) { ), }), async execute(_toolCallId, params, signal, onUpdate, _ctx) { + const projectRoot = process.cwd(); + const startedAt = Date.now(); if (signal?.aborted) { + await recordWebSearchEvidence(projectRoot, { + query: params.query ?? "", + strategy: "aborted", + scope: params.domain ?? "", + status: "aborted", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: "Search cancelled", + }); return { content: [{ type: "text", text: "Search cancelled." }], details: undefined, @@ -586,6 +613,15 @@ export function registerSearchTool(pi) { // ------------------------------------------------------------------ const provider = resolveSearchProvider(); if (!provider) { + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: "none", + scope: params.domain ?? "", + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: "No search API key set", + }); return { content: [ { @@ -604,6 +640,19 @@ export function registerSearchTool(pi) { // Session-level search budget // ------------------------------------------------------------------ if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) { + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: provider, + scope: params.domain ?? "", + status: "budget_exhausted", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})`, + result: { + sessionTotalSearches, + maxSearches: MAX_SEARCHES_PER_SESSION, + }, + }); return { content: [ { @@ -661,6 +710,16 @@ export function registerSearchTool(pi) { if (cacheKey === lastSearchKey) { consecutiveDupeCount++; if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) { + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: provider, + scope: params.domain ?? "", + status: "search_loop", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: "Consecutive duplicate search detected", + result: { consecutiveDupeCount }, + }); return { content: [ { @@ -727,6 +786,21 @@ export function registerSearchTool(pi) { moreResultsAvailable: cached.moreResultsAvailable, provider, }; + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: provider, + scope: params.domain ?? "", + status: "ok", + hitCount: limited.length, + elapsedMs: Date.now() - startedAt, + result: { + cached: true, + effectiveQuery, + freshness: freshness || "none", + hasSummary: !!summaryText, + results: summarizeWebResults(limited), + }, + }); return { content: [{ type: "text", text: content }], details }; } onUpdate?.({ @@ -864,9 +938,38 @@ export function registerSearchTool(pi) { moreResultsAvailable: searchResult.moreResultsAvailable, provider, }; + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: provider, + scope: params.domain ?? "", + status: "ok", + hitCount: results.length, + elapsedMs: Date.now() - startedAt, + result: { + cached: false, + effectiveQuery, + freshness: freshness || "none", + hasSummary: !!summaryText, + latencyMs, + results: summarizeWebResults(results), + }, + }); return { content: [{ type: "text", text: content }], details }; } catch (error) { const classified = classifyError(error); + await recordWebSearchEvidence(projectRoot, { + query: params.query, + strategy: provider, + scope: params.domain ?? "", + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: classified.message, + result: { + errorKind: classified.kind, + retryAfterMs: classified.retryAfterMs, + }, + }); return { content: [ { type: "text", text: `Search failed: ${classified.message}` }, diff --git a/src/resources/extensions/sf/bootstrap/query-tools.js b/src/resources/extensions/sf/bootstrap/query-tools.js index e01b4f5d6..0e65e843b 100644 --- a/src/resources/extensions/sf/bootstrap/query-tools.js +++ b/src/resources/extensions/sf/bootstrap/query-tools.js @@ -1,5 +1,6 @@ // SF2 — Read-only query tools exposing DB state to the LLM via the WAL connection import { Type } from "@sinclair/typebox"; +import { getRetrievalEvidence } from "../sf-db.js"; import { executeMilestoneStatus } from "../tools/workflow-tool-executors.js"; import { ensureDbOpen } from "./dynamic-tools.js"; export function registerQueryTools(pi) { @@ -36,4 +37,59 @@ export function registerQueryTools(pi) { return executeMilestoneStatus(params); }, }); + pi.registerTool({ + name: "sf_retrieval_evidence", + label: "Retrieval Evidence", + description: + "Read recent retrieval provenance from the SF database. Returns source backend, query, scope, freshness, status, and result metadata. " + + "Use this instead of querying .sf/sf.db directly when auditing Sift, codebase_search, Context7, or web-derived context.", + promptSnippet: + "Inspect recent retrieval evidence rows with backend, scope, freshness, and hit counts", + promptGuidelines: [ + "Use this to verify whether context came from live source, stale indexed docs, or another retrieval backend before trusting it.", + "Prefer rows with backend=sift and freshness=working-tree/git-head for implementation decisions.", + ], + parameters: Type.Object({ + limit: Type.Optional( + Type.Number({ + description: "Maximum number of retrieval evidence rows to return.", + default: 20, + }), + ), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const dbAvailable = await ensureDbOpen(); + if (!dbAvailable) { + return { + content: [ + { + type: "text", + text: "Error: SF database is not available. Cannot read retrieval evidence.", + }, + ], + details: { + operation: "retrieval_evidence", + error: "db_unavailable", + }, + }; + } + const limit = Math.max(1, Math.min(100, params.limit ?? 20)); + const rows = getRetrievalEvidence(limit); + const lines = [`Retrieval evidence: ${rows.length} row(s)`, ""]; + for (const row of rows) { + lines.push( + `- #${row.id} ${row.backend}/${row.sourceKind} ${row.status} ` + + `freshness=${row.freshness} scope=${row.scope || "."} hits=${row.hitCount} ` + + `query="${row.query}"`, + ); + } + return { + content: [{ type: "text", text: lines.join("\n") }], + details: { + operation: "retrieval_evidence", + rows, + }, + }; + }, + }); } diff --git a/src/resources/extensions/sf/doctor-providers.js b/src/resources/extensions/sf/doctor-providers.js index 531de37db..a400bb132 100644 --- a/src/resources/extensions/sf/doctor-providers.js +++ b/src/resources/extensions/sf/doctor-providers.js @@ -11,10 +11,10 @@ * - Optional search/tool integrations (Brave, Tavily, Jina, Context7) */ import { existsSync } from "node:fs"; -import { getEnvApiKey } from "@singularity-forge/pi-ai"; import { AuthStorage } from "@singularity-forge/pi-coding-agent"; import { getAuthPath, PROVIDER_REGISTRY } from "./key-manager.js"; import { loadEffectiveSFPreferences } from "./preferences.js"; +import { getConfiguredEnvApiKey } from "./provider-env-auth.js"; import { couldBeVaultUri, hasProviderCredentialEnvVar, @@ -141,7 +141,7 @@ function resolveKey(providerId) { // Check environment variable using the authoritative env var resolution // (handles multi-var lookups like ANTHROPIC_OAUTH_TOKEN || ANTHROPIC_API_KEY, // COPILOT_GITHUB_TOKEN || GH_TOKEN || GITHUB_TOKEN, Vertex ADC, Bedrock, etc.) - if (getEnvApiKey(providerId)) { + if (getConfiguredEnvApiKey(providerId)) { return { found: true, source: "env", backedOff: false }; } // Check for vault:// URIs in env vars (late-binding resolution) @@ -278,6 +278,7 @@ function checkLlmProviders() { label, category: "llm", status: "ok", + source: lookup.source, message: `${label} — key present (${lookup.source})`, required: true, }); diff --git a/src/resources/extensions/sf/extension-manifest.json b/src/resources/extensions/sf/extension-manifest.json index f2e809f61..6f851b4cc 100644 --- a/src/resources/extensions/sf/extension-manifest.json +++ b/src/resources/extensions/sf/extension-manifest.json @@ -31,6 +31,7 @@ "sf_replan_slice", "sf_requirement_save", "sf_requirement_update", + "sf_retrieval_evidence", "sf_resume", "sf_save_gate_result", "sf_self_feedback_resolve", diff --git a/src/resources/extensions/sf/preferences-models.js b/src/resources/extensions/sf/preferences-models.js index ba3455d47..7e6b00b5b 100644 --- a/src/resources/extensions/sf/preferences-models.js +++ b/src/resources/extensions/sf/preferences-models.js @@ -8,11 +8,7 @@ import { existsSync, readFileSync, writeFileSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; -import { - getEnvApiKey, - getModels, - getProviders, -} from "@singularity-forge/pi-ai"; +import { getModels, getProviders } from "@singularity-forge/pi-ai"; import { DEFAULT_RUNAWAY_CHANGED_FILES_WARNING, DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS, @@ -26,6 +22,7 @@ import { getGlobalSFPreferencesPath, loadEffectiveSFPreferences, } from "./preferences.js"; +import { getConfiguredEnvApiKey } from "./provider-env-auth.js"; const OPENCODE_FREE_MODEL_IDS = new Set([ "big-pickle", @@ -35,7 +32,6 @@ const OPENCODE_FREE_MODEL_IDS = new Set([ ]); const HIDDEN_MODEL_PROVIDERS = new Set([ "claude-code", - "google", "google-vertex", "groq", "github-copilot", @@ -266,7 +262,7 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) { const candidates = []; for (const provider of getProviders()) { if (!isProviderAllowedByLists(provider, allowed, blocked)) continue; - if (!getEnvApiKey(provider)) continue; + if (!getConfiguredEnvApiKey(provider)) continue; for (const model of getModels(provider)) { if ( !isProviderModelAllowed( @@ -296,7 +292,12 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) { }); if (!picked) return undefined; return { primary: picked.primary, fallbacks: picked.fallbacks }; - } catch { + } catch (err) { + if (process.env.SF_DEBUG_PREFERENCES_MODELS === "1") { + console.warn( + `preferences-models auto benchmark failed: ${err instanceof Error ? err.stack || err.message : String(err)}`, + ); + } return undefined; } } diff --git a/src/resources/extensions/sf/provider-env-auth.js b/src/resources/extensions/sf/provider-env-auth.js new file mode 100644 index 000000000..84dcfa752 --- /dev/null +++ b/src/resources/extensions/sf/provider-env-auth.js @@ -0,0 +1,73 @@ +import { existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { getEnvApiKey } from "@singularity-forge/pi-ai"; +import { + getAgentDir, + SettingsManager, +} from "@singularity-forge/pi-coding-agent"; + +const GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS = new Set([ + "google", + "google-gemini-cli", +]); + +function readJson(path) { + try { + if (!existsSync(path)) return {}; + return JSON.parse(readFileSync(path, "utf-8")); + } catch { + return {}; + } +} + +function readProviderEnvAuthSettings(cwd, agentDir) { + const globalSettings = readJson(join(agentDir, "settings.json")); + const projectSettings = readJson(join(cwd, ".sf", "settings.json")); + return { + ...(globalSettings.providerEnvAuth ?? {}), + ...(projectSettings.providerEnvAuth ?? {}), + providers: { + ...(globalSettings.providerEnvAuth?.providers ?? {}), + ...(projectSettings.providerEnvAuth?.providers ?? {}), + }, + }; +} + +function getProviderEnvAuthMode(providerId, cwd) { + const agentDir = getAgentDir(); + const settingsManager = SettingsManager.create(cwd, agentDir); + if (typeof settingsManager.getProviderEnvAuthMode === "function") { + return settingsManager.getProviderEnvAuthMode(providerId); + } + const settings = readProviderEnvAuthSettings(cwd, agentDir); + return ( + settings.providers?.[providerId] ?? + settings.default ?? + (GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS.has(providerId) ? "off" : "auto") + ); +} + +function getProviderEnvKey(providerId) { + const apiKey = getEnvApiKey(providerId); + if (apiKey) return apiKey; + if (providerId === "google") { + return ( + process.env.GEMINI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY + ); + } + return undefined; +} + +/** + * Return the provider env API key only when Forge settings allow env auth. + * + * Purpose: keep SF extension-side provider heuristics aligned with the core + * providerEnvAuth policy so ambient env keys do not bypass settings.json. + * + * Consumer: doctor-providers.js and preferences-models.js when checking whether + * a provider is available from environment credentials. + */ +export function getConfiguredEnvApiKey(providerId, cwd = process.cwd()) { + if (getProviderEnvAuthMode(providerId, cwd) === "off") return undefined; + return getProviderEnvKey(providerId); +} diff --git a/src/resources/extensions/sf/retrieval-evidence.js b/src/resources/extensions/sf/retrieval-evidence.js new file mode 100644 index 000000000..6f5411667 --- /dev/null +++ b/src/resources/extensions/sf/retrieval-evidence.js @@ -0,0 +1,66 @@ +/** + * retrieval-evidence.js — DB-backed retrieval provenance helpers. + * + * Purpose: give local code, docs, and web retrieval tools one audit contract so + * agents can distinguish live source evidence from stale or external context. + * + * Consumer: `sift_search`, `codebase_search`, and future Context7/web bridges. + */ +import { execFileSync } from "node:child_process"; + +function readGitValue(projectRoot, args) { + try { + return execFileSync("git", args, { + cwd: projectRoot, + encoding: "utf-8", + stdio: ["ignore", "pipe", "ignore"], + timeout: 2_000, + }).trim(); + } catch { + return ""; + } +} + +/** + * Build best-effort git provenance for a retrieval event. + * Purpose: label whether a context hit came from clean HEAD, a dirty worktree, + * or an unknown non-git directory before it is trusted for planning. + * Consumer: retrieval evidence writers. + */ +export function buildRetrievalProvenance(projectRoot) { + const gitHead = readGitValue(projectRoot, ["rev-parse", "HEAD"]) || null; + const gitBranch = + readGitValue(projectRoot, ["branch", "--show-current"]) || + readGitValue(projectRoot, ["rev-parse", "--abbrev-ref", "HEAD"]) || + null; + const status = readGitValue(projectRoot, ["status", "--porcelain"]); + return { + gitHead, + gitBranch, + worktreeDirty: status.length > 0, + freshness: gitHead + ? status.length > 0 + ? "working-tree" + : "git-head" + : "unknown", + }; +} + +/** + * Record retrieval evidence without making retrieval depend on DB availability. + * Purpose: preserve provenance when the SF DB is open while letting search tools + * degrade normally in standalone or early-startup contexts. + * Consumer: local retrieval tool implementations. + */ +export async function recordRetrievalEvidence(projectRoot, entry) { + try { + const { insertRetrievalEvidence } = await import("./sf-db.js"); + insertRetrievalEvidence({ + projectRoot, + ...buildRetrievalProvenance(projectRoot), + ...entry, + }); + } catch { + // Retrieval evidence is best-effort; search results must remain usable. + } +} diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index 1077618f7..3cc3d5333 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -78,7 +78,7 @@ function openRawDb(path) { loadProvider(); return new DatabaseSync(path); } -const SCHEMA_VERSION = 40; +const SCHEMA_VERSION = 41; function indexExists(db, name) { return !!db .prepare( @@ -319,6 +319,39 @@ function ensureSelfFeedbackTables(db) { "CREATE INDEX IF NOT EXISTS idx_self_feedback_kind ON self_feedback(kind, ts)", ); } +function ensureRetrievalEvidenceTables(db) { + db.exec(` + CREATE TABLE IF NOT EXISTS retrieval_evidence ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + backend TEXT NOT NULL, + source_kind TEXT NOT NULL DEFAULT 'code', + query TEXT NOT NULL DEFAULT '', + strategy TEXT NOT NULL DEFAULT '', + scope TEXT NOT NULL DEFAULT '', + project_root TEXT NOT NULL DEFAULT '', + git_head TEXT DEFAULT NULL, + git_branch TEXT DEFAULT NULL, + worktree_dirty INTEGER NOT NULL DEFAULT 0, + freshness TEXT NOT NULL DEFAULT 'unknown', + status TEXT NOT NULL DEFAULT 'ok', + hit_count INTEGER NOT NULL DEFAULT 0, + elapsed_ms INTEGER NOT NULL DEFAULT 0, + cache_path TEXT DEFAULT NULL, + error TEXT DEFAULT NULL, + result_json TEXT NOT NULL DEFAULT '{}', + recorded_at TEXT NOT NULL + ) + `); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_backend_recorded ON retrieval_evidence(backend, recorded_at DESC)", + ); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_scope_recorded ON retrieval_evidence(scope, recorded_at DESC)", + ); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_status_recorded ON retrieval_evidence(status, recorded_at DESC)", + ); +} function ensureSpecSchemaTables(db) { // Tier 1.3: Spec/Runtime/Evidence schema separation // Creates 9 normalized tables for milestone, slice, task entities @@ -867,6 +900,7 @@ function initSchema(db, fileBacked) { `); ensureSelfFeedbackTables(db); ensureSolverEvalTables(db); + ensureRetrievalEvidenceTables(db); db.exec( "CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)", ); @@ -940,6 +974,7 @@ function initSchema(db, fileBacked) { ensureHeadlessRunTables(db); ensureUokMessageTables(db); ensureSpecSchemaTables(db); + ensureRetrievalEvidenceTables(db); db.exec( `CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`, ); @@ -2106,6 +2141,15 @@ function migrateSchema(db) { ":applied_at": new Date().toISOString(), }); } + if (currentVersion < 41) { + ensureRetrievalEvidenceTables(db); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 41, + ":applied_at": new Date().toISOString(), + }); + } db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -6208,6 +6252,73 @@ export function getJudgmentsForUnit(unitIdPrefix, limit = 1000) { return []; } } +// ─── Retrieval Evidence ───────────────────────────────────────────────────── + +/** + * Record a retrieval lookup with source provenance. + * Purpose: let SF compare live code, semantic, docs, and web context by the same + * freshness and scope contract before planning or implementation trusts it. + * Consumer: Sift/codebase search tools and future Context7/web retrieval bridges. + */ +export function insertRetrievalEvidence(args) { + if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); + const now = args.recordedAt ?? new Date().toISOString(); + currentDb + .prepare(`INSERT INTO retrieval_evidence ( + backend, source_kind, query, strategy, scope, project_root, + git_head, git_branch, worktree_dirty, freshness, status, + hit_count, elapsed_ms, cache_path, error, result_json, recorded_at + ) VALUES ( + :backend, :source_kind, :query, :strategy, :scope, :project_root, + :git_head, :git_branch, :worktree_dirty, :freshness, :status, + :hit_count, :elapsed_ms, :cache_path, :error, :result_json, :recorded_at + )`) + .run({ + ":backend": args.backend, + ":source_kind": args.sourceKind ?? "code", + ":query": args.query ?? "", + ":strategy": args.strategy ?? "", + ":scope": args.scope ?? "", + ":project_root": args.projectRoot ?? "", + ":git_head": args.gitHead ?? null, + ":git_branch": args.gitBranch ?? null, + ":worktree_dirty": intBool(args.worktreeDirty), + ":freshness": args.freshness ?? "unknown", + ":status": args.status ?? "ok", + ":hit_count": args.hitCount ?? 0, + ":elapsed_ms": args.elapsedMs ?? 0, + ":cache_path": args.cachePath ?? null, + ":error": args.error ?? null, + ":result_json": JSON.stringify(args.result ?? {}), + ":recorded_at": now, + }); +} + +/** + * Return recent retrieval evidence rows. + * Purpose: support audits that need to distinguish live source evidence from + * stale indexed or prose-only context. + * Consumer: inspect/doctor tooling and tests for retrieval provenance. + */ +export function getRetrievalEvidence(limit = 100) { + if (!currentDb) return []; + const rows = currentDb + .prepare(`SELECT + id, backend, source_kind AS sourceKind, query, strategy, scope, + project_root AS projectRoot, git_head AS gitHead, + git_branch AS gitBranch, worktree_dirty AS worktreeDirty, + freshness, status, hit_count AS hitCount, elapsed_ms AS elapsedMs, + cache_path AS cachePath, error, result_json AS resultJson, recorded_at AS recordedAt + FROM retrieval_evidence + ORDER BY recorded_at DESC, id DESC + LIMIT :limit`) + .all({ ":limit": limit }); + return rows.map((row) => ({ + ...row, + worktreeDirty: row.worktreeDirty === 1, + result: parseJsonObject(row.resultJson, {}), + })); +} // ─── Memory Embeddings ─────────────────────────────────────────────────────── export function upsertMemoryEmbedding(args) { if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); diff --git a/src/resources/extensions/sf/tests/context7-retrieval-evidence.test.mjs b/src/resources/extensions/sf/tests/context7-retrieval-evidence.test.mjs new file mode 100644 index 000000000..d0c4425e2 --- /dev/null +++ b/src/resources/extensions/sf/tests/context7-retrieval-evidence.test.mjs @@ -0,0 +1,124 @@ +/** + * context7-retrieval-evidence.test.mjs — Context7 provenance coverage. + * + * Purpose: prove external documentation lookups write DB evidence with backend, + * scope, freshness, and result metadata before agents trust indexed docs. + */ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import registerContext7Extension from "../../context7/index.js"; +import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js"; + +const tmpRoots = []; +const originalCwd = process.cwd(); +const originalFetch = globalThis.fetch; + +afterEach(() => { + process.chdir(originalCwd); + closeDatabase(); + globalThis.fetch = originalFetch; + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-context7-evidence-")); + tmpRoots.push(root); + mkdirSync(join(root, ".sf"), { recursive: true }); + return root; +} + +function captureContext7Tools() { + const tools = new Map(); + registerContext7Extension({ + on() {}, + registerTool(definition) { + tools.set(definition.name, definition); + }, + }); + return tools; +} + +test("resolve_library_when_successful_records_retrieval_evidence", async () => { + const project = makeProject(); + process.chdir(project); + assert.equal(openDatabase(join(project, ".sf", "sf.db")), true); + globalThis.fetch = async (url) => { + assert.match(String(url), /\/libs\/search/); + return { + ok: true, + json: async () => ({ + results: [ + { + id: "/websites/example_docs", + title: "Example Docs", + trustScore: 9, + benchmarkScore: 87.5, + }, + ], + }), + }; + }; + + const tools = captureContext7Tools(); + const result = await tools.get("resolve_library").execute("call-1", { + libraryName: "example-docs-evidence", + query: "routing", + }); + + assert.equal(result.details.resultCount, 1); + const rows = getRetrievalEvidence(5); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "context7"); + assert.equal(rows[0].sourceKind, "docs"); + assert.equal(rows[0].strategy, "library-search"); + assert.equal(rows[0].scope, "example-docs-evidence"); + assert.equal(rows[0].freshness, "external-index"); + assert.equal(rows[0].status, "ok"); + assert.equal(rows[0].hitCount, 1); + assert.deepEqual(rows[0].result.libraries, [ + { + id: "/websites/example_docs", + title: "Example Docs", + trustScore: 9, + benchmarkScore: 87.5, + }, + ]); +}); + +test("get_library_docs_when_successful_records_retrieval_evidence", async () => { + const project = makeProject(); + process.chdir(project); + assert.equal(openDatabase(join(project, ".sf", "sf.db")), true); + globalThis.fetch = async (url) => { + assert.match(String(url), /\/context/); + return { + ok: true, + text: async () => "Example docs about routing.", + }; + }; + + const tools = captureContext7Tools(); + const result = await tools.get("get_library_docs").execute("call-1", { + libraryId: "/websites/example_docs", + query: "routing", + tokens: 500, + }); + + assert.equal(result.details.charCount, "Example docs about routing.".length); + const rows = getRetrievalEvidence(5); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "context7"); + assert.equal(rows[0].sourceKind, "docs"); + assert.equal(rows[0].strategy, "docs-fetch"); + assert.equal(rows[0].scope, "/websites/example_docs"); + assert.equal(rows[0].freshness, "external-index"); + assert.equal(rows[0].status, "ok"); + assert.equal(rows[0].hitCount, 1); + assert.equal(rows[0].result.tokens, 500); + assert.equal(rows[0].result.charCount, "Example docs about routing.".length); +}); diff --git a/src/resources/extensions/sf/tests/doctor-providers.test.mjs b/src/resources/extensions/sf/tests/doctor-providers.test.mjs index 289ac4229..1c45cb25b 100644 --- a/src/resources/extensions/sf/tests/doctor-providers.test.mjs +++ b/src/resources/extensions/sf/tests/doctor-providers.test.mjs @@ -24,6 +24,7 @@ function makePreferencesProject(globalPreferences) { const home = join(root, "home"); const project = join(root, "project"); mkdirSync(home, { recursive: true }); + mkdirSync(join(home, ".sf", "agent"), { recursive: true }); mkdirSync(join(project, ".sf"), { recursive: true }); writeFileSync(join(home, "preferences.md"), globalPreferences, "utf-8"); writeFileSync( @@ -32,6 +33,7 @@ function makePreferencesProject(globalPreferences) { "utf-8", ); process.env.SF_HOME = home; + process.env.HOME = home; process.chdir(project); return project; } @@ -91,4 +93,59 @@ describe("doctor provider checks", () => { assert.equal(telegram?.status, "unconfigured"); assert.equal(telegram?.required, false); }); + + test("runProviderChecks_when_google_env_auth_is_default_off_treats_google_as_missing_required_route", () => { + makePreferencesProject( + [ + "---", + "version: 1", + "models:", + " planning: google/gemini-2.5-pro", + "---", + "", + ].join("\n"), + ); + process.env.GEMINI_API_KEY = "test-google-key"; + + const results = runProviderChecks(); + const google = results.find((result) => result.name === "google"); + + assert.equal(google?.status, "error"); + }); + + test("runProviderChecks_when_google_env_auth_is_enabled_accepts_google_env_key", () => { + const project = makePreferencesProject( + [ + "---", + "version: 1", + "models:", + " planning: google/gemini-2.5-pro", + "---", + "", + ].join("\n"), + ); + mkdirSync(join(project, ".sf"), { recursive: true }); + writeFileSync( + join(project, ".sf", "settings.json"), + JSON.stringify( + { + providerEnvAuth: { + providers: { + google: "on", + }, + }, + }, + null, + 2, + ), + "utf-8", + ); + process.env.GEMINI_API_KEY = "test-google-key"; + + const results = runProviderChecks(); + const google = results.find((result) => result.name === "google"); + + assert.equal(google?.status, "ok"); + assert.equal(google?.source, "env"); + }); }); diff --git a/src/resources/extensions/sf/tests/preferences-models.test.mjs b/src/resources/extensions/sf/tests/preferences-models.test.mjs new file mode 100644 index 000000000..335ee2178 --- /dev/null +++ b/src/resources/extensions/sf/tests/preferences-models.test.mjs @@ -0,0 +1,93 @@ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { resolveModelWithFallbacksForUnit } from "../preferences-models.js"; +import { getConfiguredEnvApiKey } from "../provider-env-auth.js"; + +const originalCwd = process.cwd(); +const originalEnv = { ...process.env }; +const tmpDirs = []; + +afterEach(() => { + process.chdir(originalCwd); + process.env = { ...originalEnv }; + while (tmpDirs.length > 0) { + rmSync(tmpDirs.pop(), { recursive: true, force: true }); + } +}); + +function makePreferencesProject(projectPreferences, projectSettings) { + const root = mkdtempSync(join(tmpdir(), "sf-preferences-models-")); + tmpDirs.push(root); + const home = join(root, "home"); + const project = join(root, "project"); + mkdirSync(join(home, ".sf", "agent"), { recursive: true }); + mkdirSync(join(project, ".sf"), { recursive: true }); + writeFileSync( + join(project, ".sf", "PREFERENCES.md"), + projectPreferences, + "utf-8", + ); + if (projectSettings) { + writeFileSync( + join(project, ".sf", "settings.json"), + JSON.stringify(projectSettings, null, 2), + "utf-8", + ); + } + process.env.HOME = home; + process.chdir(project); + return project; +} + +describe("preferences model resolution", () => { + test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_default_off_skips_google_auto_benchmark_candidates", () => { + makePreferencesProject( + [ + "---", + "version: 1", + "allowed_providers:", + " - google", + "models: {}", + "---", + "", + ].join("\n"), + ); + process.env.GEMINI_API_KEY = "test-google-key"; + + const result = resolveModelWithFallbacksForUnit("plan-milestone"); + + assert.equal(result, undefined); + }); + + test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_enabled_uses_google_auto_benchmark_candidates", () => { + makePreferencesProject( + [ + "---", + "version: 1", + "allowed_providers:", + " - google", + "models: {}", + "---", + "", + ].join("\n"), + { + providerEnvAuth: { + providers: { + google: "on", + }, + }, + }, + ); + process.env.GEMINI_API_KEY = "test-google-key"; + + assert.equal(getConfiguredEnvApiKey("google"), "test-google-key"); + const result = resolveModelWithFallbacksForUnit("plan-milestone"); + + assert.ok(result); + assert.equal(typeof result.primary, "string"); + assert.ok(result.primary.length > 0); + }); +}); diff --git a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs index 25cffd58e..0bb897486 100644 --- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs +++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs @@ -14,9 +14,11 @@ import { closeDatabase, getDatabase, getJudgmentsForUnit, + getRetrievalEvidence, getScheduleEntries, insertGateRun, insertJudgment, + insertRetrievalEvidence, insertScheduleEntry, openDatabase, } from "../sf-db.js"; @@ -203,7 +205,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill", const version = db .prepare("SELECT MAX(version) AS version FROM schema_version") .get(); - assert.equal(version.version, 40); + assert.equal(version.version, 41); const taskSpec = db .prepare( "SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'", @@ -343,3 +345,37 @@ test("openDatabase_judgments_table_round_trip", () => { assert.equal(t01.length, 1); assert.equal(t01[0].confidence, "high"); }); + +test("openDatabase_retrieval_evidence_table_round_trip", () => { + assert.equal(openDatabase(":memory:"), true); + insertRetrievalEvidence({ + backend: "sift", + sourceKind: "code", + query: "approval policy", + strategy: "bm25", + scope: "src", + projectRoot: "/repo", + gitHead: "abc123", + gitBranch: "main", + worktreeDirty: true, + freshness: "working-tree", + status: "ok", + hitCount: 1, + elapsedMs: 42, + cachePath: "/repo/.sf/runtime/sift/search-cache", + result: { hits: [{ path: "src/index.ts", score: 0.9 }] }, + recordedAt: "2026-05-07T00:00:00.000Z", + }); + + const rows = getRetrievalEvidence(10); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "sift"); + assert.equal(rows[0].sourceKind, "code"); + assert.equal(rows[0].query, "approval policy"); + assert.equal(rows[0].scope, "src"); + assert.equal(rows[0].worktreeDirty, true); + assert.equal(rows[0].freshness, "working-tree"); + assert.deepEqual(rows[0].result, { + hits: [{ path: "src/index.ts", score: 0.9 }], + }); +}); diff --git a/src/resources/extensions/sf/tests/sift-retrieval-evidence.test.mjs b/src/resources/extensions/sf/tests/sift-retrieval-evidence.test.mjs new file mode 100644 index 000000000..ec4afb527 --- /dev/null +++ b/src/resources/extensions/sf/tests/sift-retrieval-evidence.test.mjs @@ -0,0 +1,168 @@ +/** + * sift-retrieval-evidence.test.mjs — Sift retrieval provenance coverage. + * + * Purpose: prove live code searches record DB evidence with backend, scope, + * freshness, and result metadata so context tools can be audited consistently. + */ +import assert from "node:assert/strict"; +import { + chmodSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import registerSubagentExtension from "../../subagent/index.js"; +import { registerQueryTools } from "../bootstrap/query-tools.js"; +import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js"; +import { registerSiftSearchTool } from "../tools/sift-search-tool.js"; + +const tmpRoots = []; +const originalCwd = process.cwd(); +const originalSiftPath = process.env.SIFT_PATH; + +afterEach(() => { + process.chdir(originalCwd); + closeDatabase(); + if (originalSiftPath === undefined) { + delete process.env.SIFT_PATH; + } else { + process.env.SIFT_PATH = originalSiftPath; + } + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-sift-evidence-")); + tmpRoots.push(root); + mkdirSync(join(root, ".sf"), { recursive: true }); + mkdirSync(join(root, "src"), { recursive: true }); + writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n"); + return root; +} + +function makeFakeSift() { + const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-")); + tmpRoots.push(dir); + const bin = join(dir, "sift"); + writeFileSync( + bin, + `#!/bin/sh +printf '{"query":"approval policy","strategy":"bm25","hits":[{"path":"src/index.js","score":0.91,"content":"approval policy contract","line_start":1,"line_end":1}]}'`, + ); + chmodSync(bin, 0o755); + return bin; +} + +function captureTool() { + let tool = null; + registerSiftSearchTool({ + registerTool(definition) { + tool = definition; + }, + }); + assert.ok(tool, "sift_search tool should register"); + return tool; +} + +function captureQueryTool(name) { + const tools = new Map(); + registerQueryTools({ + registerTool(definition) { + tools.set(definition.name, definition); + }, + }); + const tool = tools.get(name); + assert.ok(tool, `${name} tool should register`); + return tool; +} + +function captureSubagentTool(name) { + const tools = new Map(); + registerSubagentExtension({ + on() {}, + registerCommand() {}, + registerTool(definition) { + tools.set(definition.name, definition); + }, + }); + const tool = tools.get(name); + assert.ok(tool, `${name} tool should register`); + return tool; +} + +test("sift_search_when_successful_records_retrieval_evidence", async () => { + const project = makeProject(); + process.chdir(project); + process.env.SIFT_PATH = makeFakeSift(); + assert.equal(openDatabase(join(project, ".sf", "sf.db")), true); + + const tool = captureTool(); + const result = await tool.execute("call-1", { + query: "approval policy", + path: "src", + strategy: "bm25", + limit: 5, + }); + + assert.equal(result.isError, undefined); + assert.equal(result.details.hitCount, 1); + const rows = getRetrievalEvidence(5); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "sift"); + assert.equal(rows[0].sourceKind, "code"); + assert.equal(rows[0].query, "approval policy"); + assert.equal(rows[0].strategy, "bm25"); + assert.equal(rows[0].scope, "src"); + assert.equal(rows[0].status, "ok"); + assert.equal(rows[0].hitCount, 1); + assert.equal(rows[0].projectRoot, project); + assert.match(rows[0].cachePath, /\.sf\/runtime\/sift\/search-cache$/); + assert.deepEqual(rows[0].result.hits, [ + { + path: "src/index.js", + score: 0.91, + lineStart: 1, + lineEnd: 1, + }, + ]); + + const queryTool = captureQueryTool("sf_retrieval_evidence"); + const queryResult = await queryTool.execute("call-2", { limit: 1 }); + assert.match(queryResult.content[0].text, /Retrieval evidence: 1 row/); + assert.equal(queryResult.details.rows[0].backend, "sift"); + assert.equal(queryResult.details.rows[0].query, "approval policy"); +}); + +test("codebase_search_when_successful_records_retrieval_evidence", async () => { + const project = makeProject(); + process.chdir(project); + process.env.SIFT_PATH = makeFakeSift(); + assert.equal(openDatabase(join(project, ".sf", "sf.db")), true); + + const tool = captureSubagentTool("codebase_search"); + const result = await tool.execute("call-1", { + query: "approval policy", + scope: "src", + strategy: "path-hybrid", + timeoutMs: 10_000, + }); + + assert.equal(result.details.operation, "codebase_search"); + assert.equal(result.details.exitCode, 0); + const rows = getRetrievalEvidence(5); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "codebase_search"); + assert.equal(rows[0].sourceKind, "code"); + assert.equal(rows[0].query, "approval policy"); + assert.equal(rows[0].strategy, "path-hybrid"); + assert.equal(rows[0].scope, "src"); + assert.equal(rows[0].status, "ok"); + assert.equal(rows[0].hitCount, 1); + assert.match(rows[0].result.outputPreview, /approval policy contract/); +}); diff --git a/src/resources/extensions/sf/tests/web-search-retrieval-evidence.test.mjs b/src/resources/extensions/sf/tests/web-search-retrieval-evidence.test.mjs new file mode 100644 index 000000000..fbe91626d --- /dev/null +++ b/src/resources/extensions/sf/tests/web-search-retrieval-evidence.test.mjs @@ -0,0 +1,136 @@ +/** + * web-search-retrieval-evidence.test.mjs — web search provenance coverage. + * + * Purpose: prove current/external fact lookups write DB evidence before agents + * use web results as planning input. + */ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { + registerSearchTool, + resetSearchLoopGuardState, +} from "../../search-the-web/tool-search.js"; +import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js"; + +const tmpRoots = []; +const originalCwd = process.cwd(); +const originalFetch = globalThis.fetch; +const originalBraveKey = process.env.BRAVE_API_KEY; +const originalTavilyKey = process.env.TAVILY_API_KEY; +const originalMiniMaxCodePlanKey = process.env.MINIMAX_CODE_PLAN_KEY; +const originalMiniMaxCodingKey = process.env.MINIMAX_CODING_API_KEY; +const originalMiniMaxKey = process.env.MINIMAX_API_KEY; +const originalSerperKey = process.env.SERPER_API_KEY; +const originalExaKey = process.env.EXA_API_KEY; +const originalOllamaKey = process.env.OLLAMA_API_KEY; + +afterEach(() => { + process.chdir(originalCwd); + closeDatabase(); + globalThis.fetch = originalFetch; + if (originalBraveKey === undefined) { + delete process.env.BRAVE_API_KEY; + } else { + process.env.BRAVE_API_KEY = originalBraveKey; + } + restoreEnv("TAVILY_API_KEY", originalTavilyKey); + restoreEnv("MINIMAX_CODE_PLAN_KEY", originalMiniMaxCodePlanKey); + restoreEnv("MINIMAX_CODING_API_KEY", originalMiniMaxCodingKey); + restoreEnv("MINIMAX_API_KEY", originalMiniMaxKey); + restoreEnv("SERPER_API_KEY", originalSerperKey); + restoreEnv("EXA_API_KEY", originalExaKey); + restoreEnv("OLLAMA_API_KEY", originalOllamaKey); + resetSearchLoopGuardState(); + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function restoreEnv(key, value) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } +} + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-web-search-evidence-")); + tmpRoots.push(root); + mkdirSync(join(root, ".sf"), { recursive: true }); + return root; +} + +function captureSearchTool() { + let tool = null; + registerSearchTool({ + registerTool(definition) { + tool = definition; + }, + writeTempFile: async () => "/tmp/not-used", + }); + assert.ok(tool, "search-the-web tool should register"); + return tool; +} + +test("search_the_web_when_successful_records_retrieval_evidence", async () => { + const project = makeProject(); + process.chdir(project); + process.env.BRAVE_API_KEY = "test-brave-key"; + process.env.TAVILY_API_KEY = ""; + process.env.MINIMAX_CODE_PLAN_KEY = ""; + process.env.MINIMAX_CODING_API_KEY = ""; + process.env.MINIMAX_API_KEY = ""; + process.env.SERPER_API_KEY = ""; + process.env.EXA_API_KEY = ""; + process.env.OLLAMA_API_KEY = ""; + assert.equal(openDatabase(join(project, ".sf", "sf.db")), true); + globalThis.fetch = async (url) => { + assert.match(String(url), /api\.search\.brave\.com/); + return { + ok: true, + headers: { get: () => null }, + json: async () => ({ + web: { + results: [ + { + title: "Example Result", + url: "https://example.com/result", + description: "A result for testing.", + }, + ], + }, + query: { + original: "example query", + altered: "example query", + }, + }), + }; + }; + + const tool = captureSearchTool(); + const result = await tool.execute("call-1", { + query: "example query", + count: 1, + }); + + assert.equal(result.details.provider, "brave"); + assert.equal(result.details.count, 1); + const rows = getRetrievalEvidence(5); + assert.equal(rows.length, 1); + assert.equal(rows[0].backend, "search-the-web"); + assert.equal(rows[0].sourceKind, "web"); + assert.equal(rows[0].strategy, "brave"); + assert.equal(rows[0].freshness, "external-live"); + assert.equal(rows[0].status, "ok"); + assert.equal(rows[0].hitCount, 1); + assert.deepEqual(rows[0].result.results, [ + { + title: "Example Result", + url: "https://example.com/result", + }, + ]); +}); diff --git a/src/resources/extensions/sf/tools/sift-search-tool.js b/src/resources/extensions/sf/tools/sift-search-tool.js index 9d0082757..315d6c2e7 100644 --- a/src/resources/extensions/sf/tools/sift-search-tool.js +++ b/src/resources/extensions/sf/tools/sift-search-tool.js @@ -17,6 +17,7 @@ import { resolveSiftBinary, resolveSiftSearchScope, } from "../code-intelligence.js"; +import { recordRetrievalEvidence } from "../retrieval-evidence.js"; const _KNOWN_STRATEGIES = [ "hybrid", @@ -263,20 +264,45 @@ export function registerSiftSearchTool(pi) { const elapsedMs = Date.now() - startedAt; const result = parseSiftOutput(stdout, stderr); const runtimeDirs = ensureSiftRuntimeDirs(projectRoot); - - // Telemetry: log query outcomes for tuning - const { logInfo } = await import("../workflow-logger.js"); - logInfo("sift_search", { + await recordRetrievalEvidence(projectRoot, { + backend: "sift", + sourceKind: "code", query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, - agent: params.agent ?? false, - path: scope, + scope, + status: "ok", hitCount: result.hits.length, elapsedMs, - binary: binaryPath, - searchCache: runtimeDirs.searchCache, + cachePath: runtimeDirs.searchCache, + result: { + hits: result.hits.map((hit) => ({ + path: hit.path, + score: hit.score, + lineStart: hit.lineStart, + lineEnd: hit.lineEnd, + })), + agent: params.agent ?? false, + agentMode: params.agentMode ?? null, + plannerStrategy: params.plannerStrategy ?? null, + }, }); + try { + const { debugLog } = await import("../debug-logger.js"); + debugLog("sift_search", { + query: params.query, + strategy: params.strategy ?? DEFAULT_STRATEGY, + agent: params.agent ?? false, + path: scope, + hitCount: result.hits.length, + elapsedMs, + binary: binaryPath, + searchCache: runtimeDirs.searchCache, + }); + } catch { + // Telemetry must not change search semantics. + } + const lines = [ `Sift search: "${params.query}"`, `Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`, @@ -319,6 +345,17 @@ export function registerSiftSearchTool(pi) { } catch (err) { const elapsedMs = Date.now() - startedAt; const message = err instanceof Error ? err.message : String(err); + await recordRetrievalEvidence(projectRoot, { + backend: "sift", + sourceKind: "code", + query: params.query, + strategy: params.strategy ?? DEFAULT_STRATEGY, + scope, + status: "error", + hitCount: 0, + elapsedMs, + error: message, + }); return { content: [ { diff --git a/src/resources/extensions/subagent/index.js b/src/resources/extensions/subagent/index.js index 06caba7f6..0615cc524 100644 --- a/src/resources/extensions/subagent/index.js +++ b/src/resources/extensions/subagent/index.js @@ -29,6 +29,7 @@ import { resolveSiftSearchScope, } from "../sf/code-intelligence.js"; import { loadEffectiveSFPreferences } from "../sf/preferences.js"; +import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js"; import { formatTokenCount } from "../shared/mod.js"; import { getCurrentPhase } from "../shared/sf-phase-state.js"; import { discoverAgents } from "./agents.js"; @@ -2281,6 +2282,7 @@ export default function (pi) { const scope = resolveSiftSearchScope(projectRoot, params.scope); const strategy = params.strategy ?? "page-index-hybrid"; const query = params.query; + const startedAt = Date.now(); const timeoutMs = typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs) @@ -2288,6 +2290,17 @@ export default function (pi) { : CODEBASE_SEARCH_TIMEOUT_MS; const siftBin = resolveSiftBinary(); if (!siftBin) { + await recordRetrievalEvidence(projectRoot, { + backend: "codebase_search", + sourceKind: "code", + query, + strategy, + scope, + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + error: "sift binary not found", + }); return { content: [ { @@ -2367,6 +2380,22 @@ export default function (pi) { const text = timedOut ? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.` : "Code search aborted."; + await recordRetrievalEvidence(projectRoot, { + backend: "codebase_search", + sourceKind: "code", + query, + strategy, + scope, + status: timedOut ? "timeout" : "aborted", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + cachePath: runtimeDirs.searchCache, + error: text, + result: { + siftBin, + timeoutMs, + }, + }); return { content: [ { @@ -2396,6 +2425,23 @@ export default function (pi) { : err ? `\n\nsift stderr: ${err.slice(0, 500)}` : ""; + await recordRetrievalEvidence(projectRoot, { + backend: "codebase_search", + sourceKind: "code", + query, + strategy, + scope, + status: "error", + hitCount: 0, + elapsedMs: Date.now() - startedAt, + cachePath: runtimeDirs.searchCache, + error: err || `exit ${exitCode}`, + result: { + siftBin, + exitCode, + timeoutMs, + }, + }); return { content: [ { @@ -2415,6 +2461,24 @@ export default function (pi) { }, }; } + await recordRetrievalEvidence(projectRoot, { + backend: "codebase_search", + sourceKind: "code", + query, + strategy, + scope, + status: exitCode === 0 ? "ok" : "partial", + hitCount: out.trim() ? 1 : 0, + elapsedMs: Date.now() - startedAt, + cachePath: runtimeDirs.searchCache, + error: err || null, + result: { + siftBin, + exitCode, + timeoutMs, + outputPreview: out.slice(0, 2_000), + }, + }); return { content: [ {