server: listen: "0.0.0.0:3000" request_timeout: 300s max_request_body_mb: 10 session_secret: "${SESSION_SECRET}" default_admin: username: "${ADMIN_USERNAME}" password: "${ADMIN_PASSWORD}" tokens: - name: "open-webui" key: "${OPENWEBUI_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 5.0 - name: "rayandrew" key: "${PERSONAL_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 10.0 pricing_lookup: # url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default refresh_interval: 6h database: path: "/data/gateway.db" retention_days: 90 cache: enabled: true address: "valkey:6379" ttl: 3600 providers: - name: deepinfra base_url: "https://api.deepinfra.com/v1/openai" api_key: "${DEEPINFRA_API_KEY}" priority: 1 timeout: 120s - name: siliconflow base_url: "https://api.siliconflow.com/v1" api_key: "${SILICONFLOW_API_KEY}" priority: 2 timeout: 120s - name: openrouter base_url: "https://openrouter.ai/api/v1" api_key: "${OPENROUTER_API_KEY}" priority: 3 timeout: 120s - name: groq base_url: "https://api.groq.com/openai/v1" api_key: "${GROQ_API_KEY}" priority: 1 timeout: 120s - name: cerebras base_url: "https://api.cerebras.ai/v1" api_key: "${CEREBRAS_API_KEY}" priority: 1 timeout: 120s models: - name: "deepseek-v3.2" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.26, output: 0.38 } - provider: siliconflow model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.27, output: 0.42 } - provider: openrouter model: "deepseek/deepseek-chat-v3-0324" pricing: { input: 0.30, output: 0.88 } - name: "llama-3.3-70b" routes: - provider: groq model: "llama-3.3-70b-versatile" pricing: { input: 0, output: 0 } - provider: deepinfra model: "meta-llama/Llama-3.3-70B-Instruct" pricing: { input: 0.23, output: 0.40 } - name: "llama-3.1-8b" routes: - provider: groq model: "llama-3.1-8b-instant" pricing: { input: 0, output: 0 } - provider: cerebras model: "llama-3.1-8b" pricing: { input: 0, output: 0 } - provider: deepinfra model: "meta-llama/Meta-Llama-3.1-8B-Instruct" pricing: { input: 0.03, output: 0.05 } - name: "qwen-2.5-72b" routes: - provider: groq model: "qwen-2.5-72b" pricing: { input: 0, output: 0 } - provider: deepinfra model: "Qwen/Qwen2.5-72B-Instruct" pricing: { input: 0.23, output: 0.40 } - name: "qwen-2.5-coder-32b" routes: - provider: groq model: "qwen-2.5-coder-32b" pricing: { input: 0, output: 0 } - provider: deepinfra model: "Qwen/Qwen2.5-Coder-32B-Instruct" pricing: { input: 0.07, output: 0.16 } - name: "gemma-2-9b" routes: - provider: groq model: "gemma2-9b-it" pricing: { input: 0, output: 0 } - name: "deepseek-r1" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1" pricing: { input: 0.40, output: 1.60 } - provider: openrouter model: "deepseek/deepseek-r1" pricing: { input: 0.55, output: 2.19 } - name: "deepseek-r1-distill-llama-70b" routes: - provider: groq model: "deepseek-r1-distill-llama-70b" pricing: { input: 0, output: 0 } - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" pricing: { input: 0.23, output: 0.69 } - name: "deepseek-r1-distill-qwen-32b" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" pricing: { input: 0.07, output: 0.16 }