233 lines
8.8 KiB
YAML
233 lines
8.8 KiB
YAML
model_list:
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 1: Free providers (try first)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# --- Groq (free tier, very fast) ---
|
|
- model_name: llama-3.3-70b
|
|
litellm_params:
|
|
model: groq/llama-3.3-70b-versatile
|
|
api_key: os.environ/GROQ_API_KEY
|
|
|
|
# --- Cerebras (free tier, very fast) ---
|
|
- model_name: llama-3.3-70b-cerebras
|
|
litellm_params:
|
|
model: cerebras/llama-3.3-70b
|
|
api_key: os.environ/CEREBRAS_API_KEY
|
|
|
|
# --- OpenRouter free models ---
|
|
- model_name: deepseek-v3-free
|
|
litellm_params:
|
|
model: openrouter/deepseek/deepseek-chat-v3-0324:free
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 2: DeepSeek V3.2 (cheapest first)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# DeepSeek V3.2 via DeepInfra ($0.26 in / $0.38 out per M)
|
|
- model_name: deepseek-v3.2
|
|
litellm_params:
|
|
model: deepinfra/deepseek-ai/DeepSeek-V3.2
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# DeepSeek V3.2 fallback via SiliconFlow ($0.27 in / $0.42 out per M)
|
|
- model_name: deepseek-v3.2
|
|
litellm_params:
|
|
model: openai/deepseek-ai/DeepSeek-V3.2
|
|
api_base: https://api.siliconflow.com/v1
|
|
api_key: os.environ/SILICONFLOW_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 3: Ultra-cheap DeepInfra models
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# GPT-OSS-120B — OpenAI open-weight MoE ($0.05 in / $0.24 out per M)
|
|
- model_name: gpt-oss
|
|
litellm_params:
|
|
model: deepinfra/openai/gpt-oss-120b
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# GPT-OSS-20B — lower latency variant ($0.04 in / $0.16 out per M)
|
|
- model_name: gpt-oss-20b
|
|
litellm_params:
|
|
model: deepinfra/openai/gpt-oss-20b
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# Nemotron Super 49B — near-flagship quality ($0.10 in / $0.40 out per M)
|
|
- model_name: nemotron-super
|
|
litellm_params:
|
|
model: deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# Nemotron Nano 9B — dirt cheap for simple tasks ($0.04 in / $0.16 out per M)
|
|
- model_name: nemotron-nano
|
|
litellm_params:
|
|
model: deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 4: Other DeepInfra models
|
|
# ═══════════════════════════════════════════════
|
|
|
|
- model_name: deepseek-r1
|
|
litellm_params:
|
|
model: deepinfra/deepseek-ai/DeepSeek-R1
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
- model_name: devstral
|
|
litellm_params:
|
|
model: deepinfra/mistralai/Devstral-Small-2505
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 5: GLM models (cheapest first)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# GLM-4.6 via DeepInfra ($0.60 in / $1.90 out per M)
|
|
- model_name: glm-4.6
|
|
litellm_params:
|
|
model: deepinfra/zai-org/GLM-4.6
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# GLM-4.7 via DeepInfra ($0.40 in / $1.75 out per M)
|
|
- model_name: glm-4.7
|
|
litellm_params:
|
|
model: deepinfra/zai-org/GLM-4.7
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# GLM-4.7 fallback via SiliconFlow
|
|
- model_name: glm-4.7
|
|
litellm_params:
|
|
model: openai/THUDM/GLM-4-32B-0414
|
|
api_base: https://api.siliconflow.com/v1
|
|
api_key: os.environ/SILICONFLOW_API_KEY
|
|
|
|
# GLM-5 via DeepInfra ($0.80 in / $2.56 out per M)
|
|
- model_name: glm-5
|
|
litellm_params:
|
|
model: deepinfra/zai-org/GLM-5
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 6: Kimi K2 (cheapest first)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# Kimi K2 via DeepInfra ($0.50 in / $2.00 out per M)
|
|
- model_name: kimi-k2
|
|
litellm_params:
|
|
model: deepinfra/moonshotai/Kimi-K2-Instruct-0905
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# Kimi K2 fallback via SiliconFlow ($0.58 in / $2.29 out per M)
|
|
- model_name: kimi-k2
|
|
litellm_params:
|
|
model: openai/moonshotai/Kimi-K2-Instruct-0905
|
|
api_base: https://api.siliconflow.com/v1
|
|
api_key: os.environ/SILICONFLOW_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 7: SiliconFlow (Qwen)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M)
|
|
- model_name: qwen3-coder
|
|
litellm_params:
|
|
model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct
|
|
api_base: https://api.siliconflow.com/v1
|
|
api_key: os.environ/SILICONFLOW_API_KEY
|
|
|
|
# Qwen3 Coder 30B — cheaper alternative for simpler tasks
|
|
- model_name: qwen3-coder-30b
|
|
litellm_params:
|
|
model: openai/Qwen/Qwen3-Coder-30B-A3B-Instruct
|
|
api_base: https://api.siliconflow.com/v1
|
|
api_key: os.environ/SILICONFLOW_API_KEY
|
|
|
|
# ═══════════════════════════════════════════════
|
|
# TIER 8: OpenRouter (most expensive, widest selection)
|
|
# ═══════════════════════════════════════════════
|
|
|
|
# Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M)
|
|
- model_name: kimi-k2.5
|
|
litellm_params:
|
|
model: deepinfra/moonshotai/Kimi-K2.5
|
|
api_key: os.environ/DEEPINFRA_API_KEY
|
|
|
|
# Kimi K2.5 fallback via OpenRouter
|
|
- model_name: kimi-k2.5
|
|
litellm_params:
|
|
model: openrouter/moonshotai/kimi-k2.5
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: minimax-m2.5
|
|
litellm_params:
|
|
model: openrouter/minimax/minimax-m2.5
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: gpt-4.1-mini
|
|
litellm_params:
|
|
model: openrouter/openai/gpt-4.1-mini
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: gemini-3-flash-preview
|
|
litellm_params:
|
|
model: openrouter/google/gemini-3-flash-preview
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: trinity-large-preview
|
|
litellm_params:
|
|
model: openrouter/arcee-ai/trinity-large-preview
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
# --- OpenRouter premium models ---
|
|
- model_name: gemini-2.5-pro
|
|
litellm_params:
|
|
model: openrouter/google/gemini-2.5-pro-preview
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: claude-sonnet
|
|
litellm_params:
|
|
model: openrouter/anthropic/claude-sonnet-4
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
- model_name: gpt-4.1
|
|
litellm_params:
|
|
model: openrouter/openai/gpt-4.1
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
# DeepSeek V3.2 last-resort fallback via OpenRouter
|
|
- model_name: deepseek-v3.2
|
|
litellm_params:
|
|
model: openrouter/deepseek/deepseek-chat-v3-0324
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
general_settings:
|
|
master_key: os.environ/LITELLM_MASTER_KEY
|
|
|
|
# ── Model group fallbacks (when model misbehaves mid-stream) ──
|
|
fallbacks:
|
|
- deepseek-v3.2: [gpt-oss, kimi-k2]
|
|
- gpt-oss: [deepseek-v3.2, nemotron-super]
|
|
- kimi-k2: [deepseek-v3.2, gpt-oss]
|
|
- kimi-k2.5: [deepseek-v3.2, gpt-oss]
|
|
- glm-4.7: [deepseek-v3.2, gpt-oss]
|
|
- glm-5: [deepseek-v3.2, gpt-oss]
|
|
|
|
litellm_settings:
|
|
drop_params: true
|
|
set_verbose: false
|
|
num_retries: 2
|
|
request_timeout: 600
|
|
|
|
# ── Response caching via Valkey (reuses SearXNG's instance) ──
|
|
cache: true
|
|
cache_params:
|
|
type: redis
|
|
host: valkey
|
|
port: 6379
|
|
ttl: 3600
|
|
|
|
# ── Budget limit: $3/day to prevent surprise bills ──
|
|
# max_budget: 3.0
|
|
# budget_duration: "1d"
|