singularity-forge/packages/coding-agent/src/utils/proxy-server.ts

import type { Server } from "node:http";
import {
	type Context,
	getModels,
	type StreamOptions,
	stream,
} from "@singularity-forge/ai";
import express from "express";
import type { AuthStorage } from "../core/auth-storage.js";
import type { ModelRegistry } from "../core/model-registry.js";

export type ProxyServerOptions = {
	port: number;
	authStorage: AuthStorage;
	modelRegistry: ModelRegistry;
	/** Per-family provider priority overrides from settings.proxy.providerPriority */
	priorityOverrides?: Record<string, string[]>;
	onLog?: (msg: string) => void;
};

// Per-family provider priority for bare model ID resolution. When the same model ID
// exists across multiple providers, the first matching family rule wins; within that
// rule providers are tried in order, preferring those with auth configured. Providers
// not listed in any rule fall back to insertion order.
const PROXY_FAMILY_PRIORITY: Array<{ match: RegExp; providers: string[] }> = [
	// MiniMax: international direct > CN endpoint
	{ match: /^MiniMax-/i, providers: ["minimax", "minimax-cn"] },
	// GLM: zai is the canonical direct provider > opencode aggregators
	{ match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] },
	// Kimi: kimi-coding direct > opencode aggregators
	{ match: /^kimi-/i, providers: ["kimi-coding", "opencode", "opencode-go"] },
	// Gemini/Gemma: proxy bare model IDs through cli-core only.
	{
		match: /^gemini-|^gemma-/i,
		providers: ["google-gemini-cli"],
	},
	// Claude: anthropic direct > opencode. Copilot is disabled.
	{
		match: /^claude-/i,
		providers: ["anthropic", "opencode"],
	},
	// GPT/OpenAI: openai direct > azure. Copilot is disabled.
	{
		match: /^gpt-|^o[0-9]|^codex-/i,
		providers: ["openai", "azure-openai-responses"],
	},
];

function _sortByFamilyPriority<T extends { id: string; provider: string }>(
	models: T[],
): T[] {
	if (models.length <= 1) return models;
	const [first] = models;
	const rule = PROXY_FAMILY_PRIORITY.find((r) => r.match.test(first.id));
	const order = rule?.providers ?? [];
	return [...models].sort((a, b) => {
		const pa = order.indexOf(a.provider);
		const pb = order.indexOf(b.provider);
		return (pa === -1 ? Infinity : pa) - (pb === -1 ? Infinity : pb);
	});
}

export class ProxyServer {
	private server: Server | null = null;

	constructor(private options: ProxyServerOptions) {}

	async start(): Promise<void> {
		if (this.server) return;

		const app = express();
		app.use(express.json());

		const { authStorage, modelRegistry, onLog } = this.options;
		const priorityOverrides = this.options.priorityOverrides ?? {};

		const log = (msg: string) => onLog?.(msg);

		// 1. Model Listing
		app.get(["/v1/models", "/v1beta/models"], async (req, res) => {
			const providers = ["google-gemini-cli", "anthropic", "openai"];
			const allModels = providers.flatMap((p) => getModels(p as any));

			const formatted = allModels.map((m) => ({
				id: m.id,
				object: "model",
				created: 1677610602,
				owned_by: m.provider,
				name: m.name,
				capabilities: m.capabilities,
			}));

			if (req.path.startsWith("/v1beta")) {
				res.json({ models: formatted });
			} else {
				res.json({ data: formatted, object: "list" });
			}
		});

		// 2. Chat Completions (OpenAI & GenAI)
		const handleChat = async (req: express.Request, res: express.Response) => {
			const body = req.body;
			const isOpenAi = req.path.includes("/v1/chat/completions");
			const modelId = isOpenAi
				? body.model
				: req.params.modelId?.replace(/:streamGenerateContent$/, "");

			if (!modelId) {
				return res.status(400).json({ error: "Model ID is required" });
			}

			try {
				const candidates = modelRegistry.getModelsForProxy(
					modelId,
					priorityOverrides,
				);
				if (candidates.length === 0) {
					return res.status(404).json({ error: `Model ${modelId} not found` });
				}

				// Normalize messages once — shared across retry attempts
				const context: Context = isOpenAi
					? this.normalizeOpenAi(body)
					: this.normalizeGoogle(body);

				const streamOptions: StreamOptions = {
					temperature: body.temperature,
					maxTokens: isOpenAi
						? body.max_tokens
						: body.generationConfig?.maxOutputTokens,
				};

				for (const resolvedModel of candidates) {
					const apiKey = await authStorage.getApiKey(resolvedModel.provider);
					if (!apiKey) continue; // no credentials — try next

					const streamOptionsWithKey: StreamOptions = {
						...streamOptions,
						apiKey,
					};

					try {
						const eventStream = stream(
							resolvedModel as any,
							context,
							streamOptionsWithKey as any,
						);

						if (body.stream) {
							this.handleStreamingResponse(eventStream, res, isOpenAi, modelId);
						} else {
							await this.handleStaticResponse(
								eventStream,
								res,
								isOpenAi,
								modelId,
							);
						}
						return; // success
					} catch (err: any) {
						const status = err?.status ?? err?.statusCode;
						if (status === 429) {
							log(
								`Provider ${resolvedModel.provider} rate-limited (429), trying next candidate`,
							);
							continue;
						}
						throw err;
					}
				}

				// All candidates exhausted
				res
					.status(429)
					.json({ error: `All providers rate-limited for model ${modelId}` });
			} catch (err: any) {
				log(`Proxy error: ${err.message}`);
				res.status(500).json({ error: err.message });
			}
		};

		app.post("/v1/chat/completions", handleChat);
		app.post("/v1beta/models/:modelId\\:streamGenerateContent", handleChat);

		return new Promise((resolve) => {
			this.server = app.listen(this.options.port, () => {
				log(`Proxy Server running on http://localhost:${this.options.port}`);
				resolve();
			});
		});
	}

	stop(): void {
		if (this.server) {
			this.server.close();
			this.server = null;
		}
	}

	private normalizeOpenAi(body: any): Context {
		const messages = body.messages || [];
		const system = messages.find((m: any) => m.role === "system")?.content;
		const history = messages
			.filter((m: any) => m.role !== "system")
			.map((m: any) => ({
				role: m.role === "user" ? "user" : "assistant",
				content:
					typeof m.content === "string"
						? [{ type: "text", text: m.content }]
						: m.content,
			}));
		return { messages: history, systemPrompt: system };
	}

	private normalizeGoogle(body: any): Context {
		const contents = body.contents || [];
		const history = contents.map((c: any) => ({
			role: c.role === "user" ? "user" : "assistant",
			content: (c.parts || []).map((p: any) => ({
				type: "text",
				text: p.text,
			})),
		}));
		const system = body.systemInstruction?.parts?.[0]?.text;
		return { messages: history, systemPrompt: system };
	}

	private handleStreamingResponse(
		eventStream: any,
		res: express.Response,
		isOpenAi: boolean,
		modelId: string,
	) {
		res.setHeader(
			"Content-Type",
			isOpenAi ? "text/event-stream" : "application/json",
		);

		eventStream.on("data", (ev: any) => {
			if (ev.type === "text_delta") {
				if (isOpenAi) {
					const chunk = {
						id: `chatcmpl-${Date.now()}`,
						object: "chat.completion.chunk",
						created: Math.floor(Date.now() / 1000),
						model: modelId,
						choices: [
							{ index: 0, delta: { content: ev.delta }, finish_reason: null },
						],
					};
					res.write(`data: ${JSON.stringify(chunk)}\n\n`);
				} else {
					const chunk = {
						candidates: [{ content: { parts: [{ text: ev.delta }] } }],
					};
					res.write(JSON.stringify(chunk) + "\n");
				}
			}
		});

		eventStream.on("done", () => {
			if (isOpenAi) res.write("data: [DONE]\n\n");
			res.end();
		});

		eventStream.on("error", (ev: any) => {
			if (!res.headersSent)
				res.status(500).json({ error: ev.error.errorMessage });
			else res.end();
		});
	}

	private async handleStaticResponse(
		eventStream: any,
		res: express.Response,
		isOpenAi: boolean,
		modelId: string,
	) {
		let fullContent = "";
		eventStream.on("data", (ev: any) => {
			if (ev.type === "text_delta") fullContent += ev.delta;
		});

		return new Promise<void>((resolve) => {
			eventStream.on("done", () => {
				if (isOpenAi) {
					res.json({
						id: `chatcmpl-${Date.now()}`,
						object: "chat.completion",
						created: Math.floor(Date.now() / 1000),
						model: modelId,
						choices: [
							{
								index: 0,
								message: { role: "assistant", content: fullContent },
								finish_reason: "stop",
							},
						],
					});
				} else {
					res.json({
						candidates: [{ content: { parts: [{ text: fullContent }] } }],
					});
				}
				resolve();
			});
			eventStream.on("error", (ev: any) => {
				res.status(500).json({ error: ev.error.errorMessage });
				resolve();
			});
		});
	}
}