diff --git a/litellm/config.yaml b/litellm/config.yaml index a7a78ec..23bb05f 100644 --- a/litellm/config.yaml +++ b/litellm/config.yaml @@ -50,7 +50,35 @@ model_list: api_key: os.environ/SILICONFLOW_API_KEY # ═══════════════════════════════════════════════ - # TIER 3: Other DeepInfra models + # TIER 3: Ultra-cheap DeepInfra models + # ═══════════════════════════════════════════════ + + # GPT-OSS-120B — OpenAI open-weight MoE ($0.05 in / $0.24 out per M) + - model_name: gpt-oss + litellm_params: + model: deepinfra/openai/gpt-oss-120b + api_key: os.environ/DEEPINFRA_API_KEY + + # GPT-OSS-20B — lower latency variant ($0.04 in / $0.16 out per M) + - model_name: gpt-oss-20b + litellm_params: + model: deepinfra/openai/gpt-oss-20b + api_key: os.environ/DEEPINFRA_API_KEY + + # Nemotron Super 49B — near-flagship quality ($0.10 in / $0.40 out per M) + - model_name: nemotron-super + litellm_params: + model: deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5 + api_key: os.environ/DEEPINFRA_API_KEY + + # Nemotron Nano 9B — dirt cheap for simple tasks ($0.04 in / $0.16 out per M) + - model_name: nemotron-nano + litellm_params: + model: deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2 + api_key: os.environ/DEEPINFRA_API_KEY + + # ═══════════════════════════════════════════════ + # TIER 4: Other DeepInfra models # ═══════════════════════════════════════════════ - model_name: deepseek-r1 @@ -64,16 +92,55 @@ model_list: api_key: os.environ/DEEPINFRA_API_KEY # ═══════════════════════════════════════════════ - # TIER 4: SiliconFlow (Qwen/GLM) + # TIER 5: GLM models (cheapest first) # ═══════════════════════════════════════════════ - # GLM-4.7 via SiliconFlow + # GLM-4.6 via DeepInfra ($0.60 in / $1.90 out per M) + - model_name: glm-4.6 + litellm_params: + model: deepinfra/zai-org/GLM-4.6 + api_key: os.environ/DEEPINFRA_API_KEY + + # GLM-4.7 via DeepInfra ($0.40 in / $1.75 out per M) + - model_name: glm-4.7 + litellm_params: + model: deepinfra/zai-org/GLM-4.7 + api_key: os.environ/DEEPINFRA_API_KEY + + # GLM-4.7 fallback via SiliconFlow - model_name: glm-4.7 litellm_params: model: openai/THUDM/GLM-4-32B-0414 api_base: https://api.siliconflow.com/v1 api_key: os.environ/SILICONFLOW_API_KEY + # GLM-5 via DeepInfra ($0.80 in / $2.56 out per M) + - model_name: glm-5 + litellm_params: + model: deepinfra/zai-org/GLM-5 + api_key: os.environ/DEEPINFRA_API_KEY + + # ═══════════════════════════════════════════════ + # TIER 6: Kimi K2 (cheapest first) + # ═══════════════════════════════════════════════ + + # Kimi K2 via DeepInfra ($0.50 in / $2.00 out per M) + - model_name: kimi-k2 + litellm_params: + model: deepinfra/moonshotai/Kimi-K2-Instruct-0905 + api_key: os.environ/DEEPINFRA_API_KEY + + # Kimi K2 fallback via SiliconFlow ($0.58 in / $2.29 out per M) + - model_name: kimi-k2 + litellm_params: + model: openai/moonshotai/Kimi-K2-Instruct-0905 + api_base: https://api.siliconflow.com/v1 + api_key: os.environ/SILICONFLOW_API_KEY + + # ═══════════════════════════════════════════════ + # TIER 7: SiliconFlow (Qwen) + # ═══════════════════════════════════════════════ + # Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M) - model_name: qwen3-coder litellm_params: @@ -89,7 +156,7 @@ model_list: api_key: os.environ/SILICONFLOW_API_KEY # ═══════════════════════════════════════════════ - # TIER 5: OpenRouter (most expensive, widest selection) + # TIER 8: OpenRouter (most expensive, widest selection) # ═══════════════════════════════════════════════ # Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M) @@ -109,7 +176,7 @@ model_list: model: openrouter/minimax/minimax-m2.5 api_key: os.environ/OPENROUTER_API_KEY - - model_name: gpt-oss + - model_name: gpt-4.1-mini litellm_params: model: openrouter/openai/gpt-4.1-mini api_key: os.environ/OPENROUTER_API_KEY