feat: restructure model list with tiered providers and add caching config
This commit is contained in:
parent
d575162655
commit
55cc700794
1 changed files with 121 additions and 62 deletions
|
|
@ -1,18 +1,104 @@
|
|||
model_list:
|
||||
# --- OpenRouter models ---
|
||||
# ═══════════════════════════════════════════════
|
||||
# TIER 1: Free providers (try first)
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
# --- Groq (free tier, very fast) ---
|
||||
- model_name: llama-3.3-70b
|
||||
litellm_params:
|
||||
model: groq/llama-3.3-70b-versatile
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
# --- Cerebras (free tier, very fast) ---
|
||||
- model_name: llama-3.3-70b-cerebras
|
||||
litellm_params:
|
||||
model: cerebras/llama-3.3-70b
|
||||
api_key: os.environ/CEREBRAS_API_KEY
|
||||
|
||||
# --- OpenRouter free models ---
|
||||
- model_name: deepseek-v3-free
|
||||
litellm_params:
|
||||
model: openrouter/deepseek/deepseek-chat-v3-0324:free
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# TIER 2: SiliconFlow (cheapest paid, ~3-5x cheaper than OpenRouter)
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
# DeepSeek V3 — best value daily driver ($0.13 in / $0.28 out per M)
|
||||
- model_name: deepseek-v3
|
||||
litellm_params:
|
||||
model: openai/deepseek-ai/DeepSeek-V3-0324
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# DeepSeek V3.2 via SiliconFlow (cheaper than DeepInfra)
|
||||
- model_name: deepseek-v3.2
|
||||
litellm_params:
|
||||
model: openai/deepseek-ai/DeepSeek-V3-0324
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# GLM-4.7 via SiliconFlow
|
||||
- model_name: glm-4.7
|
||||
litellm_params:
|
||||
model: openai/THUDM/GLM-4-32B-0414
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M)
|
||||
- model_name: qwen3-coder
|
||||
litellm_params:
|
||||
model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# Qwen3 Coder 30B — cheaper alternative for simpler tasks
|
||||
- model_name: qwen3-coder-30b
|
||||
litellm_params:
|
||||
model: openai/Qwen/Qwen3-Coder-30B-A3B-Instruct
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# TIER 3: DeepInfra (good mid-range pricing)
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
# DeepSeek V3 fallback (if SiliconFlow is down)
|
||||
- model_name: deepseek-v3
|
||||
litellm_params:
|
||||
model: deepinfra/deepseek-ai/DeepSeek-V3-0324
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
- model_name: deepseek-r1
|
||||
litellm_params:
|
||||
model: deepinfra/deepseek-ai/DeepSeek-R1
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
- model_name: devstral
|
||||
litellm_params:
|
||||
model: deepinfra/mistralai/Devstral-Small-2505
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
# ═══════════════════════════════════════════════
|
||||
# TIER 4: OpenRouter (most expensive, widest selection)
|
||||
# ═══════════════════════════════════════════════
|
||||
|
||||
# Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M)
|
||||
- model_name: kimi-k2.5
|
||||
litellm_params:
|
||||
model: deepinfra/moonshotai/Kimi-K2.5
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
# Kimi K2.5 fallback via OpenRouter
|
||||
- model_name: kimi-k2.5
|
||||
litellm_params:
|
||||
model: openrouter/moonshotai/kimi-k2.5
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
- model_name: devstral
|
||||
- model_name: minimax-m2.5
|
||||
litellm_params:
|
||||
model: openrouter/mistralai/devstral-small
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
- model_name: minimax-m2
|
||||
litellm_params:
|
||||
model: openrouter/minimax/minimax-m1
|
||||
model: openrouter/minimax/minimax-m2.5
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
- model_name: gpt-oss
|
||||
|
|
@ -20,7 +106,17 @@ model_list:
|
|||
model: openrouter/openai/gpt-4.1-mini
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
# --- OpenRouter models (writing) ---
|
||||
- model_name: gemini-3-flash-preview
|
||||
litellm_params:
|
||||
model: openrouter/google/gemini-3-flash-preview
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
- model_name: trinity-large-preview
|
||||
litellm_params:
|
||||
model: openrouter/arcee-ai/trinity-large-preview
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
# --- OpenRouter premium models ---
|
||||
- model_name: gemini-2.5-pro
|
||||
litellm_params:
|
||||
model: openrouter/google/gemini-2.5-pro-preview
|
||||
|
|
@ -36,66 +132,29 @@ model_list:
|
|||
model: openrouter/openai/gpt-4.1
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
# DeepSeek V3 last-resort fallback via OpenRouter
|
||||
- model_name: deepseek-v3
|
||||
litellm_params:
|
||||
model: openrouter/deepseek/deepseek-chat-v3-0324:free
|
||||
model: openrouter/deepseek/deepseek-chat-v3-0324
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
# --- SiliconFlow models ---
|
||||
- model_name: glm-4.7
|
||||
litellm_params:
|
||||
model: openai/THUDM/GLM-4-32B-0414
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
- model_name: qwen3-coder
|
||||
litellm_params:
|
||||
model: openrouter/qwen/qwen3-coder
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
|
||||
- model_name: qwen3-coder-480b-sf
|
||||
litellm_params:
|
||||
model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
- model_name: qwen3-coder-30b-sf
|
||||
litellm_params:
|
||||
model: openai/Qwen/Qwen3-Coder-30B-A3B-Instruct
|
||||
api_base: https://api.siliconflow.com/v1
|
||||
api_key: os.environ/SILICONFLOW_API_KEY
|
||||
|
||||
# --- DeepInfra models ---
|
||||
- model_name: deepseek-v3.2
|
||||
litellm_params:
|
||||
model: deepinfra/deepseek-ai/DeepSeek-V3-0324
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
- model_name: devstral-deepinfra
|
||||
litellm_params:
|
||||
model: deepinfra/mistralai/Devstral-Small-2505
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
- model_name: deepseek-r1
|
||||
litellm_params:
|
||||
model: deepinfra/deepseek-ai/DeepSeek-R1
|
||||
api_key: os.environ/DEEPINFRA_API_KEY
|
||||
|
||||
# --- Groq (free/fast) ---
|
||||
- model_name: llama-3.3-70b
|
||||
litellm_params:
|
||||
model: groq/llama-3.3-70b-versatile
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
# --- Cerebras (free/fast) ---
|
||||
- model_name: llama-3.3-70b-cerebras
|
||||
litellm_params:
|
||||
model: cerebras/llama-3.3-70b
|
||||
api_key: os.environ/CEREBRAS_API_KEY
|
||||
|
||||
general_settings:
|
||||
master_key: os.environ/LITELLM_MASTER_KEY
|
||||
|
||||
litellm_settings:
|
||||
drop_params: true
|
||||
set_verbose: false
|
||||
num_retries: 2
|
||||
request_timeout: 600
|
||||
|
||||
# ── Response caching via Valkey (reuses SearXNG's instance) ──
|
||||
cache: true
|
||||
cache_params:
|
||||
type: redis
|
||||
host: valkey
|
||||
port: 6379
|
||||
ttl: 3600
|
||||
|
||||
# ── Budget limit: $3/day to prevent surprise bills ──
|
||||
max_budget: 3.0
|
||||
budget_duration: "1d"
|
||||
|
|
|
|||
Loading…
Reference in a new issue