server: listen: "0.0.0.0:3000" request_timeout: 300s max_request_body_mb: 10 session_secret: "${SESSION_SECRET}" default_admin: username: "${ADMIN_USERNAME}" password: "${ADMIN_PASSWORD}" tokens: - name: "open-webui" key: "${OPENWEBUI_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 0 - name: "opencode" key: "${OPENCODE_API_KEY}" rate_limit_rpm: 0 # unlimited daily_budget_usd: 0 pricing_lookup: # url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default refresh_interval: 6h database: path: "/data/janus.db" retention_days: 90 debug: enabled: true retention_days: 90 # data_dir: "/data" # defaults to directory of database.path # max_body_bytes: 0 # 0 = unlimited (save full bodies) cache: enabled: true address: "valkey:6379" ttl: 3600 providers: - name: deepinfra base_url: "https://api.deepinfra.com/v1/openai" api_key: "${DEEPINFRA_API_KEY}" priority: 1 timeout: 120s - name: siliconflow base_url: "https://api.siliconflow.com/v1" api_key: "${SILICONFLOW_API_KEY}" priority: 2 timeout: 120s - name: openrouter base_url: "https://openrouter.ai/api/v1" api_key: "${OPENROUTER_API_KEY}" priority: 3 timeout: 120s - name: groq base_url: "https://api.groq.com/openai/v1" api_key: "${GROQ_API_KEY}" priority: 1 timeout: 120s - name: cerebras base_url: "https://api.cerebras.ai/v1" api_key: "${CEREBRAS_API_KEY}" priority: 1 timeout: 120s - name: perplexity base_url: "https://api.perplexity.ai" api_key: "${PERPLEXITY_API_KEY}" priority: 1 timeout: 120s - name: ollama-cloud base_url: "https://ollama.com/v1" api_key: "${OLLAMA_CLOUD_API_KEY}" priority: 1 timeout: 120s models: # ═══ TIER 1: Free (OpenRouter free models, $0) ═══ # NOTE: Commented out — free models are heavily rate-limited upstream. # Uncomment if you want best-effort free access. # - name: "llama-3.3-70b-free" # routes: # - provider: openrouter # model: "meta-llama/llama-3.3-70b-instruct:free" # - name: "deepseek-r1-free" # routes: # - provider: openrouter # model: "deepseek/deepseek-r1-0528:free" # - name: "gpt-oss-free" # routes: # - provider: openrouter # model: "openai/gpt-oss-120b:free" # - name: "gpt-oss-20b-free" # routes: # - provider: openrouter # model: "openai/gpt-oss-20b:free" # - name: "qwen3-coder-free" # routes: # - provider: openrouter # model: "qwen/qwen3-coder:free" # - name: "qwen3-235b-free" # routes: # - provider: openrouter # model: "qwen/qwen3-235b-a22b-thinking-2507" # - name: "glm-4.5-air-free" # routes: # - provider: openrouter # model: "z-ai/glm-4.5-air:free" # - name: "nemotron-nano-free" # routes: # - provider: openrouter # model: "nvidia/nemotron-nano-9b-v2:free" # - name: "trinity-large-free" # routes: # - provider: openrouter # model: "arcee-ai/trinity-large-preview:free" # - name: "mistral-small-free" # routes: # - provider: openrouter # model: "mistralai/mistral-small-3.1-24b-instruct:free" # - name: "gemma-3-27b-free" # routes: # - provider: openrouter # model: "google/gemma-3-27b-it:free" # - name: "step-3.5-flash-free" # routes: # - provider: openrouter # model: "stepfun/step-3.5-flash:free" # ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══ - name: "llama-3.1-8b" routes: - provider: groq model: "llama-3.1-8b-instant" pricing: { input: 0.05, output: 0.08 } - provider: cerebras model: "llama3.1-8b" pricing: { input: 0.10, output: 0.10 } - provider: deepinfra model: "meta-llama/Meta-Llama-3.1-8B-Instruct" pricing: { input: 0.03, output: 0.05 } - name: "llama-3.3-70b" routes: - provider: deepinfra model: "meta-llama/Llama-3.3-70B-Instruct-Turbo" pricing: { input: 0.23, output: 0.40 } - provider: groq model: "llama-3.3-70b-versatile" pricing: { input: 0.59, output: 0.79 } - provider: cerebras model: "llama-3.3-70b" pricing: { input: 0.85, output: 1.20 } - name: "gpt-oss" routes: - provider: ollama-cloud model: "gpt-oss:120b-cloud" - provider: groq model: "openai/gpt-oss-120b" pricing: { input: 0.15, output: 0.60 } - provider: cerebras model: "gpt-oss-120b" pricing: { input: 0.35, output: 0.75 } - provider: deepinfra model: "openai/gpt-oss-120b" pricing: { input: 0.05, output: 0.24 } - name: "gpt-oss-20b" routes: - provider: ollama-cloud model: "gpt-oss:20b-cloud" - provider: groq model: "openai/gpt-oss-20b" pricing: { input: 0.075, output: 0.30 } - provider: deepinfra model: "openai/gpt-oss-20b" pricing: { input: 0.04, output: 0.16 } - name: "llama-4-scout" routes: - provider: groq model: "meta-llama/llama-4-scout-17b-16e-instruct" pricing: { input: 0.11, output: 0.34 } - name: "llama-4-maverick" routes: - provider: groq model: "meta-llama/llama-4-maverick-17b-128e-instruct" pricing: { input: 0.20, output: 0.60 } - name: "qwen3-32b" routes: - provider: groq model: "qwen/qwen3-32b" pricing: { input: 0.29, output: 0.59 } - provider: cerebras model: "qwen-3-32b" # ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══ - name: "deepseek-v3.2" routes: - provider: ollama-cloud model: "deepseek-v3.2:cloud" - provider: deepinfra model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.26, output: 0.38 } - provider: siliconflow model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.27, output: 0.42 } - provider: openrouter model: "deepseek/deepseek-chat-v3-0324" pricing: { input: 0.30, output: 0.88 } # ═══ TIER 4: Ultra-cheap DeepInfra ═══ - name: "nemotron-super" routes: - provider: deepinfra model: "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5" pricing: { input: 0.10, output: 0.40 } - name: "nemotron-nano" routes: - provider: deepinfra model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2" pricing: { input: 0.04, output: 0.16 } # ═══ TIER 5: DeepSeek R1 & reasoning ═══ - name: "deepseek-r1" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-0528" - provider: openrouter model: "deepseek/deepseek-r1" - name: "deepseek-r1-distill-llama-70b" routes: - provider: deepinfra model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" - name: "devstral-small" routes: - provider: ollama-cloud model: "devstral-small-2:24b-cloud" - provider: openrouter model: "mistralai/devstral-small" - name: "devstral-medium" routes: - provider: ollama-cloud model: "devstral-2:123b-cloud" - provider: openrouter model: "mistralai/devstral-medium" # ═══ TIER 6: GLM ═══ - name: "glm-4.7-flash" routes: - provider: deepinfra model: "zai-org/GLM-4.7-Flash" pricing: { input: 0.06, output: 0.40 } - provider: openrouter model: "z-ai/glm-4.7-flash" pricing: { input: 0.06, output: 0.40 } - name: "glm-4.6" routes: - provider: ollama-cloud model: "glm-4.6:cloud" - provider: deepinfra model: "zai-org/GLM-4.6" pricing: { input: 0.60, output: 1.90 } - name: "glm-4.7" routes: - provider: ollama-cloud model: "glm-4.7:cloud" - provider: deepinfra model: "zai-org/GLM-4.7" pricing: { input: 0.40, output: 1.75 } - provider: cerebras model: "zai-glm-4.7" pricing: { input: 2.25, output: 2.75 } - provider: siliconflow model: "THUDM/GLM-4-32B-0414" - name: "glm-5" routes: - provider: ollama-cloud model: "glm-5:cloud" - provider: deepinfra model: "zai-org/GLM-5" pricing: { input: 0.80, output: 2.56 } # ═══ TIER 7: Kimi ═══ - name: "kimi-k2" routes: - provider: groq model: "moonshotai/kimi-k2-instruct-0905" pricing: { input: 1.00, output: 3.00 } - provider: deepinfra model: "moonshotai/Kimi-K2-Instruct-0905" pricing: { input: 0.50, output: 2.00 } - provider: siliconflow model: "moonshotai/Kimi-K2-Instruct-0905" pricing: { input: 0.58, output: 2.29 } - name: "kimi-k2.5" routes: - provider: ollama-cloud model: "kimi-k2.5:cloud" - provider: deepinfra model: "moonshotai/Kimi-K2.5" pricing: { input: 0.45, output: 2.25 } - provider: openrouter model: "moonshotai/kimi-k2.5" # ═══ TIER 8: Qwen ═══ - name: "qwen3.5" routes: - provider: ollama-cloud model: "qwen3.5:cloud" # - provider: openrouter # model: "qwen/qwen3.5-397b-a17b" # pricing: { input: 0.60, output: 3.60 } - name: "qwen3-coder" routes: - provider: ollama-cloud model: "qwen3-coder:480b-cloud" - provider: deepinfra model: "Qwen/Qwen3-Coder-480B-A35B-Instruct" pricing: { input: 0.22, output: 1.00 } - provider: openrouter model: "qwen/qwen3-coder" pricing: { input: 0.22, output: 1.00 } - provider: cerebras model: "qwen3-coder-480b" pricing: { input: 2.00, output: 2.00 } - provider: siliconflow model: "Qwen/Qwen3-Coder-480B-A35B-Instruct" pricing: { input: 1.14, output: 2.28 } - name: "qwen3-coder-30b" routes: - provider: siliconflow model: "Qwen/Qwen3-Coder-30B-A3B-Instruct" # ═══ TIER 9: OpenRouter premium (paid) ═══ - name: "minimax-m2.5" routes: - provider: ollama-cloud model: "minimax-m2.5:cloud" - provider: openrouter model: "minimax/minimax-m2.5" - name: "gpt-4.1-mini" routes: - provider: openrouter model: "openai/gpt-4.1-mini" - name: "gpt-4.1" routes: - provider: openrouter model: "openai/gpt-4.1" - name: "gemini-3-flash-preview" routes: - provider: ollama-cloud model: "gemini-3-flash-preview:cloud" - provider: openrouter model: "google/gemini-3-flash-preview" - name: "gemini-2.5-pro" routes: - provider: openrouter model: "google/gemini-2.5-pro-preview" # ═══ TIER 10: Vision / Multimodal ═══ - name: "gemma-3-4b" routes: - provider: openrouter model: "google/gemma-3-4b-it" pricing: { input: 0.017, output: 0.068 } - provider: deepinfra model: "google/gemma-3-4b-it" pricing: { input: 0.04, output: 0.08 } - name: "gemma-3-12b" routes: - provider: openrouter model: "google/gemma-3-12b-it" pricing: { input: 0.03, output: 0.10 } - provider: deepinfra model: "google/gemma-3-12b-it" pricing: { input: 0.04, output: 0.13 } - name: "gemma-3-27b" routes: - provider: openrouter model: "google/gemma-3-27b-it" pricing: { input: 0.04, output: 0.15 } - provider: deepinfra model: "google/gemma-3-27b-it" pricing: { input: 0.08, output: 0.16 } - name: "qwen3-vl-8b" routes: - provider: openrouter model: "qwen/qwen3-vl-8b-instruct" pricing: { input: 0.08, output: 0.50 } - provider: deepinfra model: "Qwen/Qwen3-VL-8B-Instruct" pricing: { input: 0.18, output: 0.69 } - name: "qwen3-vl-32b" routes: - provider: openrouter model: "qwen/qwen3-vl-32b-instruct" pricing: { input: 0.104, output: 0.416 } - name: "qwen2.5-vl-32b" routes: - provider: openrouter model: "qwen/qwen2.5-vl-32b-instruct" pricing: { input: 0.05, output: 0.22 } - provider: deepinfra model: "Qwen/Qwen2.5-VL-32B-Instruct" pricing: { input: 0.20, output: 0.60 } - name: "claude-sonnet" routes: - provider: openrouter model: "anthropic/claude-sonnet-4" # ═══ TIER 11: Perplexity (online search models) ═══ - name: "sonar" routes: - provider: perplexity model: "sonar" pricing: { input: 1.00, output: 1.00 } - name: "sonar-pro" routes: - provider: perplexity model: "sonar-pro" pricing: { input: 3.00, output: 15.00 } - name: "sonar-reasoning" routes: - provider: perplexity model: "sonar-reasoning" pricing: { input: 1.00, output: 5.00 } # ═══ TIER 12: Ollama Cloud ═══ # Requires Ollama Cloud Pro ($20/mo) or Max ($100/mo) subscription. # Get API key from: https://ollama.com → Settings → Keys