feat: add new DeepInfra and SiliconFlow models to model list

This commit is contained in:
Ray Andrew 2026-02-14 02:42:49 -06:00
parent 58db4feaee
commit baf502108b
Signed by: rayandrew
SSH key fingerprint: SHA256:EUCV+qCSqkap8rR+p+zGjxHfKI06G0GJKgo1DIOniQY

View file

@ -50,7 +50,35 @@ model_list:
api_key: os.environ/SILICONFLOW_API_KEY
# ═══════════════════════════════════════════════
# TIER 3: Other DeepInfra models
# TIER 3: Ultra-cheap DeepInfra models
# ═══════════════════════════════════════════════
# GPT-OSS-120B — OpenAI open-weight MoE ($0.05 in / $0.24 out per M)
- model_name: gpt-oss
litellm_params:
model: deepinfra/openai/gpt-oss-120b
api_key: os.environ/DEEPINFRA_API_KEY
# GPT-OSS-20B — lower latency variant ($0.04 in / $0.16 out per M)
- model_name: gpt-oss-20b
litellm_params:
model: deepinfra/openai/gpt-oss-20b
api_key: os.environ/DEEPINFRA_API_KEY
# Nemotron Super 49B — near-flagship quality ($0.10 in / $0.40 out per M)
- model_name: nemotron-super
litellm_params:
model: deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5
api_key: os.environ/DEEPINFRA_API_KEY
# Nemotron Nano 9B — dirt cheap for simple tasks ($0.04 in / $0.16 out per M)
- model_name: nemotron-nano
litellm_params:
model: deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2
api_key: os.environ/DEEPINFRA_API_KEY
# ═══════════════════════════════════════════════
# TIER 4: Other DeepInfra models
# ═══════════════════════════════════════════════
- model_name: deepseek-r1
@ -64,16 +92,55 @@ model_list:
api_key: os.environ/DEEPINFRA_API_KEY
# ═══════════════════════════════════════════════
# TIER 4: SiliconFlow (Qwen/GLM)
# TIER 5: GLM models (cheapest first)
# ═══════════════════════════════════════════════
# GLM-4.7 via SiliconFlow
# GLM-4.6 via DeepInfra ($0.60 in / $1.90 out per M)
- model_name: glm-4.6
litellm_params:
model: deepinfra/zai-org/GLM-4.6
api_key: os.environ/DEEPINFRA_API_KEY
# GLM-4.7 via DeepInfra ($0.40 in / $1.75 out per M)
- model_name: glm-4.7
litellm_params:
model: deepinfra/zai-org/GLM-4.7
api_key: os.environ/DEEPINFRA_API_KEY
# GLM-4.7 fallback via SiliconFlow
- model_name: glm-4.7
litellm_params:
model: openai/THUDM/GLM-4-32B-0414
api_base: https://api.siliconflow.com/v1
api_key: os.environ/SILICONFLOW_API_KEY
# GLM-5 via DeepInfra ($0.80 in / $2.56 out per M)
- model_name: glm-5
litellm_params:
model: deepinfra/zai-org/GLM-5
api_key: os.environ/DEEPINFRA_API_KEY
# ═══════════════════════════════════════════════
# TIER 6: Kimi K2 (cheapest first)
# ═══════════════════════════════════════════════
# Kimi K2 via DeepInfra ($0.50 in / $2.00 out per M)
- model_name: kimi-k2
litellm_params:
model: deepinfra/moonshotai/Kimi-K2-Instruct-0905
api_key: os.environ/DEEPINFRA_API_KEY
# Kimi K2 fallback via SiliconFlow ($0.58 in / $2.29 out per M)
- model_name: kimi-k2
litellm_params:
model: openai/moonshotai/Kimi-K2-Instruct-0905
api_base: https://api.siliconflow.com/v1
api_key: os.environ/SILICONFLOW_API_KEY
# ═══════════════════════════════════════════════
# TIER 7: SiliconFlow (Qwen)
# ═══════════════════════════════════════════════
# Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M)
- model_name: qwen3-coder
litellm_params:
@ -89,7 +156,7 @@ model_list:
api_key: os.environ/SILICONFLOW_API_KEY
# ═══════════════════════════════════════════════
# TIER 5: OpenRouter (most expensive, widest selection)
# TIER 8: OpenRouter (most expensive, widest selection)
# ═══════════════════════════════════════════════
# Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M)
@ -109,7 +176,7 @@ model_list:
model: openrouter/minimax/minimax-m2.5
api_key: os.environ/OPENROUTER_API_KEY
- model_name: gpt-oss
- model_name: gpt-4.1-mini
litellm_params:
model: openrouter/openai/gpt-4.1-mini
api_key: os.environ/OPENROUTER_API_KEY