import type { Server } from "node:http"; import { type Context, getModels, type StreamOptions, stream, } from "@singularity-forge/ai"; import express from "express"; import type { AuthStorage } from "../core/auth-storage.js"; import type { ModelRegistry } from "../core/model-registry.js"; export type ProxyServerOptions = { port: number; authStorage: AuthStorage; modelRegistry: ModelRegistry; /** Per-family provider priority overrides from settings.proxy.providerPriority */ priorityOverrides?: Record; onLog?: (msg: string) => void; }; // Per-family provider priority for bare model ID resolution. When the same model ID // exists across multiple providers, the first matching family rule wins; within that // rule providers are tried in order, preferring those with auth configured. Providers // not listed in any rule fall back to insertion order. const PROXY_FAMILY_PRIORITY: Array<{ match: RegExp; providers: string[] }> = [ // MiniMax: international direct > CN endpoint { match: /^MiniMax-/i, providers: ["minimax", "minimax-cn"] }, // GLM: zai is the canonical direct provider > opencode aggregators { match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] }, // Kimi: kimi-coding direct > opencode aggregators { match: /^kimi-/i, providers: ["kimi-coding", "opencode", "opencode-go"] }, // Gemini/Gemma: proxy bare model IDs through cli-core only. { match: /^gemini-|^gemma-/i, providers: ["google-gemini-cli"], }, // Claude: anthropic direct > opencode. Copilot is disabled. { match: /^claude-/i, providers: ["anthropic", "opencode"], }, // GPT/OpenAI: openai direct > azure. Copilot is disabled. { match: /^gpt-|^o[0-9]|^codex-/i, providers: ["openai", "azure-openai-responses"], }, ]; function _sortByFamilyPriority( models: T[], ): T[] { if (models.length <= 1) return models; const [first] = models; const rule = PROXY_FAMILY_PRIORITY.find((r) => r.match.test(first.id)); const order = rule?.providers ?? []; return [...models].sort((a, b) => { const pa = order.indexOf(a.provider); const pb = order.indexOf(b.provider); return (pa === -1 ? Infinity : pa) - (pb === -1 ? Infinity : pb); }); } export class ProxyServer { private server: Server | null = null; constructor(private options: ProxyServerOptions) {} async start(): Promise { if (this.server) return; const app = express(); app.use(express.json()); const { authStorage, modelRegistry, onLog } = this.options; const priorityOverrides = this.options.priorityOverrides ?? {}; const log = (msg: string) => onLog?.(msg); // 1. Model Listing app.get(["/v1/models", "/v1beta/models"], async (req, res) => { const providers = ["google-gemini-cli", "anthropic", "openai"]; const allModels = providers.flatMap((p) => getModels(p as any)); const formatted = allModels.map((m) => ({ id: m.id, object: "model", created: 1677610602, owned_by: m.provider, name: m.name, capabilities: m.capabilities, })); if (req.path.startsWith("/v1beta")) { res.json({ models: formatted }); } else { res.json({ data: formatted, object: "list" }); } }); // 2. Chat Completions (OpenAI & GenAI) const handleChat = async (req: express.Request, res: express.Response) => { const body = req.body; const isOpenAi = req.path.includes("/v1/chat/completions"); const modelId = isOpenAi ? body.model : req.params.modelId?.replace(/:streamGenerateContent$/, ""); if (!modelId) { return res.status(400).json({ error: "Model ID is required" }); } try { const candidates = modelRegistry.getModelsForProxy( modelId, priorityOverrides, ); if (candidates.length === 0) { return res.status(404).json({ error: `Model ${modelId} not found` }); } // Normalize messages once — shared across retry attempts const context: Context = isOpenAi ? this.normalizeOpenAi(body) : this.normalizeGoogle(body); const streamOptions: StreamOptions = { temperature: body.temperature, maxTokens: isOpenAi ? body.max_tokens : body.generationConfig?.maxOutputTokens, }; for (const resolvedModel of candidates) { const apiKey = await authStorage.getApiKey(resolvedModel.provider); if (!apiKey) continue; // no credentials — try next const streamOptionsWithKey: StreamOptions = { ...streamOptions, apiKey, }; try { const eventStream = stream( resolvedModel as any, context, streamOptionsWithKey as any, ); if (body.stream) { this.handleStreamingResponse(eventStream, res, isOpenAi, modelId); } else { await this.handleStaticResponse( eventStream, res, isOpenAi, modelId, ); } return; // success } catch (err: any) { const status = err?.status ?? err?.statusCode; if (status === 429) { log( `Provider ${resolvedModel.provider} rate-limited (429), trying next candidate`, ); continue; } throw err; } } // All candidates exhausted res .status(429) .json({ error: `All providers rate-limited for model ${modelId}` }); } catch (err: any) { log(`Proxy error: ${err.message}`); res.status(500).json({ error: err.message }); } }; app.post("/v1/chat/completions", handleChat); app.post("/v1beta/models/:modelId\\:streamGenerateContent", handleChat); return new Promise((resolve) => { this.server = app.listen(this.options.port, () => { log(`Proxy Server running on http://localhost:${this.options.port}`); resolve(); }); }); } stop(): void { if (this.server) { this.server.close(); this.server = null; } } private normalizeOpenAi(body: any): Context { const messages = body.messages || []; const system = messages.find((m: any) => m.role === "system")?.content; const history = messages .filter((m: any) => m.role !== "system") .map((m: any) => ({ role: m.role === "user" ? "user" : "assistant", content: typeof m.content === "string" ? [{ type: "text", text: m.content }] : m.content, })); return { messages: history, systemPrompt: system }; } private normalizeGoogle(body: any): Context { const contents = body.contents || []; const history = contents.map((c: any) => ({ role: c.role === "user" ? "user" : "assistant", content: (c.parts || []).map((p: any) => ({ type: "text", text: p.text, })), })); const system = body.systemInstruction?.parts?.[0]?.text; return { messages: history, systemPrompt: system }; } private handleStreamingResponse( eventStream: any, res: express.Response, isOpenAi: boolean, modelId: string, ) { res.setHeader( "Content-Type", isOpenAi ? "text/event-stream" : "application/json", ); eventStream.on("data", (ev: any) => { if (ev.type === "text_delta") { if (isOpenAi) { const chunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: modelId, choices: [ { index: 0, delta: { content: ev.delta }, finish_reason: null }, ], }; res.write(`data: ${JSON.stringify(chunk)}\n\n`); } else { const chunk = { candidates: [{ content: { parts: [{ text: ev.delta }] } }], }; res.write(JSON.stringify(chunk) + "\n"); } } }); eventStream.on("done", () => { if (isOpenAi) res.write("data: [DONE]\n\n"); res.end(); }); eventStream.on("error", (ev: any) => { if (!res.headersSent) res.status(500).json({ error: ev.error.errorMessage }); else res.end(); }); } private async handleStaticResponse( eventStream: any, res: express.Response, isOpenAi: boolean, modelId: string, ) { let fullContent = ""; eventStream.on("data", (ev: any) => { if (ev.type === "text_delta") fullContent += ev.delta; }); return new Promise((resolve) => { eventStream.on("done", () => { if (isOpenAi) { res.json({ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: modelId, choices: [ { index: 0, message: { role: "assistant", content: fullContent }, finish_reason: "stop", }, ], }); } else { res.json({ candidates: [{ content: { parts: [{ text: fullContent }] } }], }); } resolve(); }); eventStream.on("error", (ev: any) => { res.status(500).json({ error: ev.error.errorMessage }); resolve(); }); }); } }