model_list: # ═══════════════════════════════════════════════ # TIER 1: Free providers (try first) # ═══════════════════════════════════════════════ # --- Groq (free tier, very fast) --- - model_name: llama-3.3-70b litellm_params: model: groq/llama-3.3-70b-versatile api_key: os.environ/GROQ_API_KEY # --- Cerebras (free tier, very fast) --- - model_name: llama-3.3-70b-cerebras litellm_params: model: cerebras/llama-3.3-70b api_key: os.environ/CEREBRAS_API_KEY # --- OpenRouter free models --- - model_name: deepseek-v3-free litellm_params: model: openrouter/deepseek/deepseek-chat-v3-0324:free api_key: os.environ/OPENROUTER_API_KEY # ═══════════════════════════════════════════════ # TIER 2: SiliconFlow (cheapest paid, ~3-5x cheaper than OpenRouter) # ═══════════════════════════════════════════════ # DeepSeek V3 — best value daily driver ($0.13 in / $0.28 out per M) - model_name: deepseek-v3 litellm_params: model: openai/deepseek-ai/DeepSeek-V3-0324 api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY # DeepSeek V3.2 via SiliconFlow (cheaper than DeepInfra) - model_name: deepseek-v3.2 litellm_params: model: openai/deepseek-ai/DeepSeek-V3-0324 api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY # GLM-4.7 via SiliconFlow - model_name: glm-4.7 litellm_params: model: openai/THUDM/GLM-4-32B-0414 api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY # Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M) - model_name: qwen3-coder litellm_params: model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY # Qwen3 Coder 30B — cheaper alternative for simpler tasks - model_name: qwen3-coder-30b litellm_params: model: openai/Qwen/Qwen3-Coder-30B-A3B-Instruct api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY # ═══════════════════════════════════════════════ # TIER 3: DeepInfra (good mid-range pricing) # ═══════════════════════════════════════════════ # DeepSeek V3 fallback (if SiliconFlow is down) - model_name: deepseek-v3 litellm_params: model: deepinfra/deepseek-ai/DeepSeek-V3-0324 api_key: os.environ/DEEPINFRA_API_KEY - model_name: deepseek-r1 litellm_params: model: deepinfra/deepseek-ai/DeepSeek-R1 api_key: os.environ/DEEPINFRA_API_KEY - model_name: devstral litellm_params: model: deepinfra/mistralai/Devstral-Small-2505 api_key: os.environ/DEEPINFRA_API_KEY # ═══════════════════════════════════════════════ # TIER 4: OpenRouter (most expensive, widest selection) # ═══════════════════════════════════════════════ # Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M) - model_name: kimi-k2.5 litellm_params: model: deepinfra/moonshotai/Kimi-K2.5 api_key: os.environ/DEEPINFRA_API_KEY # Kimi K2.5 fallback via OpenRouter - model_name: kimi-k2.5 litellm_params: model: openrouter/moonshotai/kimi-k2.5 api_key: os.environ/OPENROUTER_API_KEY - model_name: minimax-m2.5 litellm_params: model: openrouter/minimax/minimax-m2.5 api_key: os.environ/OPENROUTER_API_KEY - model_name: gpt-oss litellm_params: model: openrouter/openai/gpt-4.1-mini api_key: os.environ/OPENROUTER_API_KEY - model_name: gemini-3-flash-preview litellm_params: model: openrouter/google/gemini-3-flash-preview api_key: os.environ/OPENROUTER_API_KEY - model_name: trinity-large-preview litellm_params: model: openrouter/arcee-ai/trinity-large-preview api_key: os.environ/OPENROUTER_API_KEY # --- OpenRouter premium models --- - model_name: gemini-2.5-pro litellm_params: model: openrouter/google/gemini-2.5-pro-preview api_key: os.environ/OPENROUTER_API_KEY - model_name: claude-sonnet litellm_params: model: openrouter/anthropic/claude-sonnet-4 api_key: os.environ/OPENROUTER_API_KEY - model_name: gpt-4.1 litellm_params: model: openrouter/openai/gpt-4.1 api_key: os.environ/OPENROUTER_API_KEY # DeepSeek V3 last-resort fallback via OpenRouter - model_name: deepseek-v3 litellm_params: model: openrouter/deepseek/deepseek-chat-v3-0324 api_key: os.environ/OPENROUTER_API_KEY general_settings: master_key: os.environ/LITELLM_MASTER_KEY litellm_settings: drop_params: true set_verbose: false num_retries: 2 request_timeout: 600 # ── Response caching via Valkey (reuses SearXNG's instance) ── cache: true cache_params: type: redis host: valkey port: 6379 ttl: 3600 # ── Budget limit: $3/day to prevent surprise bills ── max_budget: 3.0 budget_duration: "1d"