diff --git a/docs/README.md b/docs/README.md index c6c3b4692..e2065fe76 100644 --- a/docs/README.md +++ b/docs/README.md @@ -11,7 +11,8 @@ Welcome to the GSD documentation. This covers everything from getting started to | [Commands Reference](./commands.md) | All commands, keyboard shortcuts, and CLI flags | | [Remote Questions](./remote-questions.md) | Discord and Slack integration for headless auto-mode | | [Configuration](./configuration.md) | Preferences, model selection, git settings, and token profiles | -| [Custom Models](./custom-models.md) | Add custom providers (Ollama, vLLM, LM Studio, proxies) via models.json | +| [Provider Setup](./providers.md) | Step-by-step setup for OpenRouter, Ollama, LM Studio, vLLM, and all supported providers | +| [Custom Models](./custom-models.md) | Advanced model configuration — models.json schema, compat flags, overrides | | [Token Optimization](./token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) | | [Dynamic Model Routing](./dynamic-model-routing.md) | Complexity-based model selection, cost tables, escalation, and budget pressure (v2.19) | | [Captures & Triage](./captures-triage.md) | Fire-and-forget thought capture during auto-mode with automated triage (v2.19) | diff --git a/docs/getting-started.md b/docs/getting-started.md index 4c2392556..6fbcf2422 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -58,6 +58,8 @@ GSD displays a welcome screen showing your version, active model, and available If you have an existing Pi installation, provider credentials are imported automatically. +For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md). + Re-run the wizard anytime with: ```bash diff --git a/docs/providers.md b/docs/providers.md new file mode 100644 index 000000000..984ee369c --- /dev/null +++ b/docs/providers.md @@ -0,0 +1,587 @@ +# Provider Setup Guide + +Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session. + +## Table of Contents + +- [Quick Reference](#quick-reference) +- [Built-in Providers](#built-in-providers) + - [Anthropic (Claude)](#anthropic-claude) + - [OpenAI](#openai) + - [Google Gemini](#google-gemini) + - [OpenRouter](#openrouter) + - [Groq](#groq) + - [xAI (Grok)](#xai-grok) + - [Mistral](#mistral) + - [GitHub Copilot](#github-copilot) + - [Amazon Bedrock](#amazon-bedrock) + - [Anthropic on Vertex AI](#anthropic-on-vertex-ai) + - [Azure OpenAI](#azure-openai) +- [Local Providers](#local-providers) + - [Ollama](#ollama) + - [LM Studio](#lm-studio) + - [vLLM](#vllm) + - [SGLang](#sglang) +- [Custom OpenAI-Compatible Endpoints](#custom-openai-compatible-endpoints) +- [Common Pitfalls](#common-pitfalls) +- [Verifying Your Setup](#verifying-your-setup) + +## Quick Reference + +| Provider | Auth Method | Env Variable | Config File | +|----------|-------------|-------------|-------------| +| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — | +| OpenAI | API key | `OPENAI_API_KEY` | — | +| Google Gemini | API key | `GEMINI_API_KEY` | — | +| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` | +| Groq | API key | `GROQ_API_KEY` | — | +| xAI | API key | `XAI_API_KEY` | — | +| Mistral | API key | `MISTRAL_API_KEY` | — | +| GitHub Copilot | OAuth | `GH_TOKEN` | — | +| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` | — | +| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | — | +| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | — | +| Ollama | None (local) | — | `models.json` required | +| LM Studio | None (local) | — | `models.json` required | +| vLLM / SGLang | None (local) | — | `models.json` required | + +--- + +## Built-in Providers + +Built-in providers have models pre-registered in GSD. You only need to supply credentials. + +### Anthropic (Claude) + +**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. + +**Option A — Browser sign-in (recommended):** + +```bash +gsd config +# Choose "Sign in with your browser" → "Anthropic (Claude)" +``` + +Or inside a session: `/login` + +**Option B — API key:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +Or paste it during `gsd config` when prompted. + +**Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) + +### OpenAI + +```bash +export OPENAI_API_KEY="sk-..." +``` + +Or run `gsd config` and choose "Paste an API key" then "OpenAI". + +**Get a key:** [platform.openai.com/api-keys](https://platform.openai.com/api-keys) + +### Google Gemini + +```bash +export GEMINI_API_KEY="..." +``` + +**Get a key:** [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) + +### OpenRouter + +OpenRouter aggregates 200+ models from multiple providers behind a single API key. + +**Step 1 — Get your API key:** + +Go to [openrouter.ai/keys](https://openrouter.ai/keys) and create a key. + +**Step 2 — Set the key:** + +```bash +export OPENROUTER_API_KEY="sk-or-..." +``` + +Or run `gsd config`, choose "Paste an API key", then "OpenRouter". + +**Step 3 — Switch to an OpenRouter model:** + +Inside a GSD session, type `/model` and select an OpenRouter model. Models are prefixed with `openrouter/` (e.g., `openrouter/anthropic/claude-sonnet-4`). + +**Optional — Add custom OpenRouter models via `models.json`:** + +If you want models not in the built-in list, add them to `~/.gsd/agent/models.json`: + +```json +{ + "providers": { + "openrouter": { + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "OPENROUTER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "meta-llama/llama-3.3-70b", + "name": "Llama 3.3 70B (OpenRouter)", + "reasoning": false, + "input": ["text"], + "contextWindow": 131072, + "maxTokens": 32768, + "cost": { "input": 0.3, "output": 0.3, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +Note: the `apiKey` field here is the *name* of the environment variable, not the literal key. GSD resolves it automatically. You can also use a literal value or a shell command (see [Value Resolution](./custom-models.md#value-resolution)). + +**Optional — Route through specific providers:** + +Use `modelOverrides` to control which upstream provider OpenRouter uses: + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + +### Groq + +```bash +export GROQ_API_KEY="gsk_..." +``` + +**Get a key:** [console.groq.com/keys](https://console.groq.com/keys) + +### xAI (Grok) + +```bash +export XAI_API_KEY="xai-..." +``` + +**Get a key:** [console.x.ai](https://console.x.ai) + +### Mistral + +```bash +export MISTRAL_API_KEY="..." +``` + +**Get a key:** [console.mistral.ai/api-keys](https://console.mistral.ai/api-keys) + +### GitHub Copilot + +Uses OAuth — sign in through the browser: + +```bash +gsd config +# Choose "Sign in with your browser" → "GitHub Copilot" +``` + +Requires an active GitHub Copilot subscription. + +### Amazon Bedrock + +Bedrock uses AWS IAM credentials, not API keys. Any of these work: + +```bash +# Option 1: Named profile +export AWS_PROFILE="my-profile" + +# Option 2: IAM keys +export AWS_ACCESS_KEY_ID="AKIA..." +export AWS_SECRET_ACCESS_KEY="..." +export AWS_REGION="us-east-1" + +# Option 3: Bedrock API key (bearer token) +export AWS_BEARER_TOKEN_BEDROCK="..." +``` + +ECS task roles and IRSA (Kubernetes) are also detected automatically. + +### Anthropic on Vertex AI + +Uses Google Cloud Application Default Credentials: + +```bash +gcloud auth application-default login +export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id" +``` + +Or set `GOOGLE_CLOUD_PROJECT` and ensure ADC credentials exist at `~/.config/gcloud/application_default_credentials.json`. + +### Azure OpenAI + +```bash +export AZURE_OPENAI_API_KEY="..." +``` + +--- + +## Local Providers + +Local providers run on your machine. They require a `models.json` configuration file because GSD needs to know the endpoint URL and which models are available. + +**Config file location:** `~/.gsd/agent/models.json` + +The file reloads each time you open `/model` — no restart needed. + +### Ollama + +**Step 1 — Install and start Ollama:** + +```bash +# macOS +brew install ollama +ollama serve + +# Or download from https://ollama.com +``` + +**Step 2 — Pull a model:** + +```bash +ollama pull llama3.1:8b +ollama pull qwen2.5-coder:7b +``` + +**Step 3 — Create `~/.gsd/agent/models.json`:** + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +The `apiKey` is required by the config schema but Ollama ignores it — any value works. + +**Step 4 — Select the model:** + +Inside GSD, type `/model` and pick your Ollama model. + +**Ollama tips:** +- Ollama does not support the `developer` role or `reasoning_effort` — always set `compat.supportsDeveloperRole: false` and `compat.supportsReasoningEffort: false`. +- If you get empty responses, check that `ollama serve` is running and the model is pulled. +- Context window and max tokens default to 128K / 16K if not specified. Override these if your model has different limits. + +### LM Studio + +**Step 1 — Install LM Studio:** + +Download from [lmstudio.ai](https://lmstudio.ai). + +**Step 2 — Start the local server:** + +In LM Studio, go to the "Local Server" tab, load a model, and click "Start Server". The default port is 1234. + +**Step 3 — Create `~/.gsd/agent/models.json`:** + +```json +{ + "providers": { + "lm-studio": { + "baseUrl": "http://localhost:1234/v1", + "api": "openai-completions", + "apiKey": "lm-studio", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "your-model-name", + "name": "My Local Model", + "contextWindow": 32768, + "maxTokens": 4096 + } + ] + } + } +} +``` + +Replace `your-model-name` with the model identifier shown in LM Studio's server tab. + +**LM Studio tips:** +- The model ID in `models.json` must match what LM Studio reports in its server API. Check the server tab for the exact string. +- LM Studio defaults to port 1234. If you changed it, update `baseUrl` accordingly. +- Increase `contextWindow` and `maxTokens` if your model supports larger contexts. + +### vLLM + +```json +{ + "providers": { + "vllm": { + "baseUrl": "http://localhost:8000/v1", + "api": "openai-completions", + "apiKey": "vllm", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct", + "contextWindow": 128000, + "maxTokens": 16384 + } + ] + } + } +} +``` + +The model `id` must match the `--model` flag you passed to `vllm serve`. + +### SGLang + +```json +{ + "providers": { + "sglang": { + "baseUrl": "http://localhost:30000/v1", + "api": "openai-completions", + "apiKey": "sglang", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct" + } + ] + } + } +} +``` + +--- + +## Custom OpenAI-Compatible Endpoints + +Any server that implements the OpenAI Chat Completions API can work with GSD. This covers proxies (LiteLLM, Portkey, Helicone), self-hosted inference, and new providers. + +**Quickest path — use the onboarding wizard:** + +```bash +gsd config +# Choose "Paste an API key" → "Custom (OpenAI-compatible)" +# Enter: base URL, API key, model ID +``` + +This writes `~/.gsd/agent/models.json` for you automatically. + +**Manual setup:** + +```json +{ + "providers": { + "my-provider": { + "baseUrl": "https://my-endpoint.example.com/v1", + "apiKey": "MY_PROVIDER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "model-id-here", + "name": "Friendly Model Name", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 16384, + "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +**Adding custom headers (for proxies):** + +```json +{ + "providers": { + "litellm-proxy": { + "baseUrl": "https://litellm.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "openai-completions", + "headers": { + "x-custom-header": "value" + }, + "models": [...] + } + } +} +``` + +**Qwen models with thinking mode:** + +For Qwen-compatible servers, use `thinkingFormat` to enable thinking mode: + +```json +{ + "compat": { + "thinkingFormat": "qwen", + "supportsDeveloperRole": false + } +} +``` + +Use `"qwen-chat-template"` instead if the server requires `chat_template_kwargs.enable_thinking`. + +For the full reference on `compat` fields, `modelOverrides`, value resolution, and advanced configuration, see [Custom Models](./custom-models.md). + +--- + +## Common Pitfalls + +### "Authentication failed" with a valid key + +**Cause:** The key is set in your shell but not visible to GSD. + +**Fix:** Make sure the environment variable is exported in the same terminal where you run `gsd`. Or use `gsd config` to save the key to `~/.gsd/agent/auth.json` so it persists across sessions. + +### OpenRouter models not appearing in `/model` + +**Cause:** No `OPENROUTER_API_KEY` set, so GSD hides OpenRouter models. + +**Fix:** Set the key and restart GSD: + +```bash +export OPENROUTER_API_KEY="sk-or-..." +gsd +``` + +### Ollama returns empty responses + +**Cause:** Ollama server isn't running, or the model isn't pulled. + +**Fix:** + +```bash +# Verify the server is running +curl http://localhost:11434/v1/models + +# Pull the model if missing +ollama pull llama3.1:8b +``` + +### LM Studio model ID mismatch + +**Cause:** The `id` in `models.json` doesn't match what LM Studio exposes via its API. + +**Fix:** Check the LM Studio server tab for the exact model identifier. It often includes the filename or quantization level (e.g., `lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF`). + +### `developer` role error with local models + +**Cause:** Most local inference servers don't support the OpenAI `developer` message role. + +**Fix:** Add `compat.supportsDeveloperRole: false` to the provider config. This makes GSD send `system` messages instead: + +```json +{ + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + } +} +``` + +### `stream_options` error with local models + +**Cause:** Some servers don't support `stream_options: { include_usage: true }`. + +**Fix:** Add `compat.supportsUsageInStreaming: false`: + +```json +{ + "compat": { + "supportsUsageInStreaming": false + } +} +``` + +### "apiKey is required" validation error + +**Cause:** `models.json` schema requires `apiKey` when `models` are defined. + +**Fix:** For local servers that don't need auth, set a dummy value: + +```json +"apiKey": "not-needed" +``` + +### Cost shows $0.00 for custom models + +**Expected behavior.** GSD defaults cost to zero for custom models. Override with the `cost` field if you want accurate cost tracking: + +```json +"cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 } +``` + +Values are per million tokens. + +--- + +## Verifying Your Setup + +After configuring a provider: + +1. **Launch GSD:** + ```bash + gsd + ``` + +2. **Check available models:** + ``` + /model + ``` + Your provider's models should appear in the list. + +3. **Switch to the model:** + Select it from the `/model` picker. + +4. **Send a test message:** + Type anything to confirm the model responds. + +If the model doesn't appear, check: +- The environment variable is set in the current shell +- `models.json` is valid JSON (use `cat ~/.gsd/agent/models.json | python3 -m json.tool`) +- The server is running (for local providers) + +For additional help, see [Troubleshooting](./troubleshooting.md) or run `/gsd doctor` inside a session. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index aef19f982..875bba7fc 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -97,6 +97,8 @@ models: **Headless mode:** `gsd headless auto` auto-restarts the entire process on crash (default 3 attempts with exponential backoff). Combined with provider error auto-resume, this enables true overnight unattended execution. +For common provider setup issues (role errors, streaming errors, model ID mismatches), see the [Provider Setup Guide — Common Pitfalls](./providers.md#common-pitfalls). + ### Budget ceiling reached **Symptoms:** Auto mode pauses with "Budget ceiling reached." diff --git a/src/help-text.ts b/src/help-text.ts index 82f262268..ab534ae62 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -3,12 +3,15 @@ const SUBCOMMAND_HELP: Record = { 'Usage: gsd config', '', 'Re-run the interactive setup wizard to configure:', - ' - LLM provider (Anthropic, OpenAI, Google, etc.)', + ' - LLM provider (Anthropic, OpenAI, Google, OpenRouter, Ollama, LM Studio, etc.)', ' - Web search provider (Brave, Tavily, built-in)', ' - Remote questions (Discord, Slack, Telegram)', ' - Tool API keys (Context7, Jina, Groq)', '', 'All steps are skippable and can be changed later with /login or /search-provider.', + '', + 'For detailed provider setup instructions (OpenRouter, Ollama, LM Studio, vLLM,', + 'and other OpenAI-compatible endpoints), see docs/providers.md.', ].join('\n'), update: [ diff --git a/src/onboarding.ts b/src/onboarding.ts index 6b21d94d6..4ae69c141 100644 --- a/src/onboarding.ts +++ b/src/onboarding.ts @@ -86,14 +86,13 @@ const API_KEY_PREFIXES: Record = { } const OTHER_PROVIDERS = [ - { value: 'google', label: 'Google (Gemini)' }, - { value: 'groq', label: 'Groq' }, - { value: 'xai', label: 'xAI (Grok)' }, - { value: 'openrouter', label: 'OpenRouter' }, - { value: 'mistral', label: 'Mistral' }, - { value: 'ollama', label: 'Ollama (Local)' }, + { value: 'google', label: 'Google (Gemini)', hint: 'aistudio.google.com/app/apikey' }, + { value: 'groq', label: 'Groq', hint: 'console.groq.com/keys' }, + { value: 'xai', label: 'xAI (Grok)', hint: 'console.x.ai' }, + { value: 'openrouter', label: 'OpenRouter', hint: '200+ models — openrouter.ai/keys' }, + { value: 'mistral', label: 'Mistral', hint: 'console.mistral.ai/api-keys' }, { value: 'ollama-cloud', label: 'Ollama Cloud' }, - { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' }, + { value: 'custom-openai', label: 'Custom (OpenAI-compatible)', hint: 'Ollama, LM Studio, vLLM, proxies — see docs/providers.md' }, ] // ─── Dynamic imports ────────────────────────────────────────────────────────── @@ -446,6 +445,13 @@ async function runApiKeyFlow( authStorage.set(providerId, { type: 'api_key', key: trimmed }) p.log.success(`API key saved for ${pc.green(providerLabel)}`) + + // Provider-specific post-setup hints + if (providerId === 'openrouter') { + p.log.info(`Use ${pc.cyan('/model')} inside GSD to pick an OpenRouter model.`) + p.log.info(`To add custom models or control routing, see ${pc.dim('docs/providers.md#openrouter')}`) + } + return true } @@ -504,10 +510,12 @@ async function runCustomOpenAIFlow( pc: PicoModule, authStorage: AuthStorage, ): Promise { + p.log.info(pc.dim('Common endpoints:\n Ollama: http://localhost:11434/v1\n LM Studio: http://localhost:1234/v1\n vLLM: http://localhost:8000/v1')) + // Prompt for base URL const baseUrl = await p.text({ message: 'Base URL of your OpenAI-compatible endpoint:', - placeholder: 'https://my-proxy.example.com/v1', + placeholder: 'http://localhost:11434/v1', validate: (val) => { const trimmed = val?.trim() if (!trimmed) return 'Base URL is required' @@ -588,6 +596,8 @@ async function runCustomOpenAIFlow( p.log.success(`Custom endpoint saved: ${pc.green(trimmedUrl)}`) p.log.info(`Model: ${pc.cyan(trimmedModelId)}`) p.log.info(`Config written to ${pc.dim(modelsJsonPath)}`) + p.log.info(`If you get role or streaming errors, add compat settings to models.json.`) + p.log.info(`See ${pc.dim('docs/providers.md#common-pitfalls')} for details.`) return true } diff --git a/src/tests/provider-help-text.test.ts b/src/tests/provider-help-text.test.ts new file mode 100644 index 000000000..e66b9b3a6 --- /dev/null +++ b/src/tests/provider-help-text.test.ts @@ -0,0 +1,22 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +// Validate that help-text.ts includes updated provider references +const { printSubcommandHelp } = await import("../../dist/help-text.js"); + +describe("help-text provider references", () => { + it("config help mentions OpenRouter and Ollama", () => { + const lines: string[] = []; + const origWrite = process.stdout.write.bind(process.stdout); + (process.stdout as any).write = (chunk: string) => { lines.push(chunk); return true; }; + try { + printSubcommandHelp("config", "0.0.0"); + } finally { + (process.stdout as any).write = origWrite; + } + const text = lines.join(""); + assert.ok(text.includes("OpenRouter"), "OpenRouter should be mentioned in config help"); + assert.ok(text.includes("Ollama"), "Ollama should be mentioned in config help"); + assert.ok(text.includes("docs/providers.md"), "providers.md reference should be in config help"); + }); +});