From 344b599badbd60de01211c85fd43e2333b355c58 Mon Sep 17 00:00:00 2001 From: Ray Andrew Date: Sun, 15 Feb 2026 01:34:19 -0600 Subject: [PATCH] feat(gateway): add config mounted --- docker-compose.yml | 2 +- llm-gateway.yaml | 264 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 llm-gateway.yaml diff --git a/docker-compose.yml b/docker-compose.yml index bc6b0ad..1147d15 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,7 +45,7 @@ services: - "0.0.0.0:4000:3000" volumes: - llm-gateway-data:/data - - ./llm-gateway/configs/config.yaml:/etc/llm-gateway/config.yaml:ro + - ./llm-gateway.yaml:/etc/llm-gateway/config.yaml:ro environment: - SESSION_SECRET=${SESSION_SECRET} - ADMIN_USERNAME=${ADMIN_USERNAME} diff --git a/llm-gateway.yaml b/llm-gateway.yaml new file mode 100644 index 0000000..9bd4c13 --- /dev/null +++ b/llm-gateway.yaml @@ -0,0 +1,264 @@ +server: + listen: "0.0.0.0:3000" + request_timeout: 300s + max_request_body_mb: 10 + session_secret: "${SESSION_SECRET}" + default_admin: + username: "${ADMIN_USERNAME}" + password: "${ADMIN_PASSWORD}" + +tokens: + - name: "open-webui" + key: "${OPENWEBUI_API_KEY}" + rate_limit_rpm: 0 # unlimited + daily_budget_usd: 5.0 + - name: "rayandrew" + key: "${PERSONAL_API_KEY}" + rate_limit_rpm: 0 # unlimited + daily_budget_usd: 10.0 + +pricing_lookup: + # url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default + refresh_interval: 6h + +database: + path: "/data/gateway.db" + retention_days: 90 + +cache: + enabled: true + address: "valkey:6379" + ttl: 3600 + +providers: + - name: deepinfra + base_url: "https://api.deepinfra.com/v1/openai" + api_key: "${DEEPINFRA_API_KEY}" + priority: 1 + timeout: 120s + - name: siliconflow + base_url: "https://api.siliconflow.com/v1" + api_key: "${SILICONFLOW_API_KEY}" + priority: 2 + timeout: 120s + - name: openrouter + base_url: "https://openrouter.ai/api/v1" + api_key: "${OPENROUTER_API_KEY}" + priority: 3 + timeout: 120s + - name: groq + base_url: "https://api.groq.com/openai/v1" + api_key: "${GROQ_API_KEY}" + priority: 1 + timeout: 120s + - name: cerebras + base_url: "https://api.cerebras.ai/v1" + api_key: "${CEREBRAS_API_KEY}" + priority: 1 + timeout: 120s + +models: + # ── DeepSeek V3.2 ── + - name: "deepseek-v3.2" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-V3.2" + pricing: { input: 0.26, output: 0.38 } + - provider: siliconflow + model: "deepseek-ai/DeepSeek-V3.2" + pricing: { input: 0.27, output: 0.42 } + - provider: openrouter + model: "deepseek/deepseek-chat-v3-0324" + pricing: { input: 0.30, output: 0.88 } + + # ── DeepSeek R1 ── + - name: "deepseek-r1" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1" + pricing: { input: 0.40, output: 1.60 } + - provider: openrouter + model: "deepseek/deepseek-r1" + pricing: { input: 0.55, output: 2.19 } + + # ── GPT-OSS (OpenAI open-weight MoE) ── + - name: "gpt-oss" + routes: + - provider: deepinfra + model: "openai/gpt-oss-120b" + pricing: { input: 0.05, output: 0.24 } + + - name: "gpt-oss-20b" + routes: + - provider: deepinfra + model: "openai/gpt-oss-20b" + pricing: { input: 0.04, output: 0.16 } + + # ── Nemotron ── + - name: "nemotron-super" + routes: + - provider: deepinfra + model: "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5" + pricing: { input: 0.10, output: 0.40 } + + - name: "nemotron-nano" + routes: + - provider: deepinfra + model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2" + pricing: { input: 0.04, output: 0.16 } + + # ── Devstral ── + - name: "devstral" + routes: + - provider: deepinfra + model: "mistralai/Devstral-Small-2505" + + # ── GLM ── + - name: "glm-4.6" + routes: + - provider: deepinfra + model: "zai-org/GLM-4.6" + pricing: { input: 0.60, output: 1.90 } + + - name: "glm-4.7" + routes: + - provider: deepinfra + model: "zai-org/GLM-4.7" + pricing: { input: 0.40, output: 1.75 } + - provider: siliconflow + model: "THUDM/GLM-4-32B-0414" + + - name: "glm-5" + routes: + - provider: deepinfra + model: "zai-org/GLM-5" + pricing: { input: 0.80, output: 2.56 } + + # ── Kimi ── + - name: "kimi-k2" + routes: + - provider: deepinfra + model: "moonshotai/Kimi-K2-Instruct-0905" + pricing: { input: 0.50, output: 2.00 } + - provider: siliconflow + model: "moonshotai/Kimi-K2-Instruct-0905" + pricing: { input: 0.58, output: 2.29 } + + - name: "kimi-k2.5" + routes: + - provider: deepinfra + model: "moonshotai/Kimi-K2.5" + pricing: { input: 0.45, output: 2.25 } + - provider: openrouter + model: "moonshotai/kimi-k2.5" + + # ── Qwen3 Coder ── + - name: "qwen3-coder" + routes: + - provider: siliconflow + model: "Qwen/Qwen3-Coder-480B-A35B-Instruct" + pricing: { input: 1.14, output: 2.28 } + + - name: "qwen3-coder-30b" + routes: + - provider: siliconflow + model: "Qwen/Qwen3-Coder-30B-A3B-Instruct" + + # ── Llama ── + - name: "llama-3.3-70b" + routes: + - provider: groq + model: "llama-3.3-70b-versatile" + - provider: cerebras + model: "llama-3.3-70b" + - provider: deepinfra + model: "meta-llama/Llama-3.3-70B-Instruct" + pricing: { input: 0.23, output: 0.40 } + + - name: "llama-3.1-8b" + routes: + - provider: groq + model: "llama-3.1-8b-instant" + - provider: cerebras + model: "llama-3.1-8b" + - provider: deepinfra + model: "meta-llama/Meta-Llama-3.1-8B-Instruct" + pricing: { input: 0.03, output: 0.05 } + + # ── Qwen 2.5 ── + - name: "qwen-2.5-72b" + routes: + - provider: groq + model: "qwen-2.5-72b" + - provider: deepinfra + model: "Qwen/Qwen2.5-72B-Instruct" + pricing: { input: 0.23, output: 0.40 } + + - name: "qwen-2.5-coder-32b" + routes: + - provider: groq + model: "qwen-2.5-coder-32b" + - provider: deepinfra + model: "Qwen/Qwen2.5-Coder-32B-Instruct" + pricing: { input: 0.07, output: 0.16 } + + # ── Other ── + - name: "gemma-2-9b" + routes: + - provider: groq + model: "gemma2-9b-it" + + - name: "deepseek-r1-distill-llama-70b" + routes: + - provider: groq + model: "deepseek-r1-distill-llama-70b" + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + pricing: { input: 0.23, output: 0.69 } + + - name: "deepseek-r1-distill-qwen-32b" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + pricing: { input: 0.07, output: 0.16 } + + # ── OpenRouter premium ── + - name: "deepseek-v3-free" + routes: + - provider: openrouter + model: "deepseek/deepseek-chat-v3-0324:free" + + - name: "minimax-m2.5" + routes: + - provider: openrouter + model: "minimax/minimax-m2.5" + + - name: "gpt-4.1-mini" + routes: + - provider: openrouter + model: "openai/gpt-4.1-mini" + + - name: "gpt-4.1" + routes: + - provider: openrouter + model: "openai/gpt-4.1" + + - name: "gemini-3-flash-preview" + routes: + - provider: openrouter + model: "google/gemini-3-flash-preview" + + - name: "gemini-2.5-pro" + routes: + - provider: openrouter + model: "google/gemini-2.5-pro-preview" + + - name: "claude-sonnet" + routes: + - provider: openrouter + model: "anthropic/claude-sonnet-4" + + - name: "trinity-large-preview" + routes: + - provider: openrouter + model: "arcee-ai/trinity-large-preview"