server: listen: "0.0.0.0:3000" request_timeout: 300s max_request_body_mb: 10 session_secret: "${SESSION_SECRET}" default_admin: username: "${ADMIN_USERNAME}" password: "${ADMIN_PASSWORD}" tokens: - name: "open-webui" key: "${OPENWEBUI_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 5.0 - name: "rayandrew" key: "${PERSONAL_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 10.0 pricing_lookup: # url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default refresh_interval: 6h database: path: "/data/gateway.db" retention_days: 90 cache: enabled: true address: "valkey:6379" ttl: 3600 providers: - name: deepinfra base_url: "https://api.deepinfra.com/v1/openai" api_key: "${DEEPINFRA_API_KEY}" priority: 1 timeout: 120s - name: siliconflow base_url: "https://api.siliconflow.com/v1" api_key: "${SILICONFLOW_API_KEY}" priority: 2 timeout: 120s - name: openrouter base_url: "https://openrouter.ai/api/v1" api_key: "${OPENROUTER_API_KEY}" priority: 3 timeout: 120s - name: groq base_url: "https://api.groq.com/openai/v1" api_key: "${GROQ_API_KEY}" priority: 1 timeout: 120s - name: cerebras base_url: "https://api.cerebras.ai/v1" api_key: "${CEREBRAS_API_KEY}" priority: 1 timeout: 120s models: # ── DeepSeek V3.2 ── - name: "deepseek-v3.2" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.26, output: 0.38 } - provider: siliconflow model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.27, output: 0.42 } - provider: openrouter model: "deepseek/deepseek-chat-v3-0324" pricing: { input: 0.30, output: 0.88 } # ── DeepSeek R1 ── - name: "deepseek-r1" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1" pricing: { input: 0.40, output: 1.60 } - provider: openrouter model: "deepseek/deepseek-r1" pricing: { input: 0.55, output: 2.19 } # ── GPT-OSS (OpenAI open-weight MoE) ── - name: "gpt-oss" routes: - provider: deepinfra model: "openai/gpt-oss-120b" pricing: { input: 0.05, output: 0.24 } - name: "gpt-oss-20b" routes: - provider: deepinfra model: "openai/gpt-oss-20b" pricing: { input: 0.04, output: 0.16 } # ── Nemotron ── - name: "nemotron-super" routes: - provider: deepinfra model: "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5" pricing: { input: 0.10, output: 0.40 } - name: "nemotron-nano" routes: - provider: deepinfra model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2" pricing: { input: 0.04, output: 0.16 } # ── Devstral ── - name: "devstral" routes: - provider: deepinfra model: "mistralai/Devstral-Small-2505" # ── GLM ── - name: "glm-4.6" routes: - provider: deepinfra model: "zai-org/GLM-4.6" pricing: { input: 0.60, output: 1.90 } - name: "glm-4.7" routes: - provider: deepinfra model: "zai-org/GLM-4.7" pricing: { input: 0.40, output: 1.75 } - provider: siliconflow model: "THUDM/GLM-4-32B-0414" - name: "glm-5" routes: - provider: deepinfra model: "zai-org/GLM-5" pricing: { input: 0.80, output: 2.56 } # ── Kimi ── - name: "kimi-k2" routes: - provider: deepinfra model: "moonshotai/Kimi-K2-Instruct-0905" pricing: { input: 0.50, output: 2.00 } - provider: siliconflow model: "moonshotai/Kimi-K2-Instruct-0905" pricing: { input: 0.58, output: 2.29 } - name: "kimi-k2.5" routes: - provider: deepinfra model: "moonshotai/Kimi-K2.5" pricing: { input: 0.45, output: 2.25 } - provider: openrouter model: "moonshotai/kimi-k2.5" # ── Qwen3 Coder ── - name: "qwen3-coder" routes: - provider: siliconflow model: "Qwen/Qwen3-Coder-480B-A35B-Instruct" pricing: { input: 1.14, output: 2.28 } - name: "qwen3-coder-30b" routes: - provider: siliconflow model: "Qwen/Qwen3-Coder-30B-A3B-Instruct" # ── Llama ── - name: "llama-3.3-70b" routes: - provider: groq model: "llama-3.3-70b-versatile" - provider: cerebras model: "llama-3.3-70b" - provider: deepinfra model: "meta-llama/Llama-3.3-70B-Instruct" pricing: { input: 0.23, output: 0.40 } - name: "llama-3.1-8b" routes: - provider: groq model: "llama-3.1-8b-instant" - provider: cerebras model: "llama-3.1-8b" - provider: deepinfra model: "meta-llama/Meta-Llama-3.1-8B-Instruct" pricing: { input: 0.03, output: 0.05 } # ── Qwen 2.5 ── - name: "qwen-2.5-72b" routes: - provider: groq model: "qwen-2.5-72b" - provider: deepinfra model: "Qwen/Qwen2.5-72B-Instruct" pricing: { input: 0.23, output: 0.40 } - name: "qwen-2.5-coder-32b" routes: - provider: groq model: "qwen-2.5-coder-32b" - provider: deepinfra model: "Qwen/Qwen2.5-Coder-32B-Instruct" pricing: { input: 0.07, output: 0.16 } # ── Other ── - name: "gemma-2-9b" routes: - provider: groq model: "gemma2-9b-it" - name: "deepseek-r1-distill-llama-70b" routes: - provider: groq model: "deepseek-r1-distill-llama-70b" - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" pricing: { input: 0.23, output: 0.69 } - name: "deepseek-r1-distill-qwen-32b" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" pricing: { input: 0.07, output: 0.16 } # ── OpenRouter premium ── - name: "deepseek-v3-free" routes: - provider: openrouter model: "deepseek/deepseek-chat-v3-0324:free" - name: "minimax-m2.5" routes: - provider: openrouter model: "minimax/minimax-m2.5" - name: "gpt-4.1-mini" routes: - provider: openrouter model: "openai/gpt-4.1-mini" - name: "gpt-4.1" routes: - provider: openrouter model: "openai/gpt-4.1" - name: "gemini-3-flash-preview" routes: - provider: openrouter model: "google/gemini-3-flash-preview" - name: "gemini-2.5-pro" routes: - provider: openrouter model: "google/gemini-2.5-pro-preview" - name: "claude-sonnet" routes: - provider: openrouter model: "anthropic/claude-sonnet-4" - name: "trinity-large-preview" routes: - provider: openrouter model: "arcee-ai/trinity-large-preview"