diff --git a/llm-gateway.yaml b/llm-gateway.yaml index 38d97a0..c9af852 100644 --- a/llm-gateway.yaml +++ b/llm-gateway.yaml @@ -59,65 +59,56 @@ providers: models: # ═══ TIER 1: Free (OpenRouter free models, $0) ═══ - - name: "llama-3.3-70b-free" - routes: - - provider: openrouter - model: "meta-llama/llama-3.3-70b-instruct:free" - - - name: "deepseek-r1-free" - routes: - - provider: openrouter - model: "deepseek/deepseek-r1-0528:free" - - - name: "gpt-oss-free" - routes: - - provider: openrouter - model: "openai/gpt-oss-120b:free" - - - name: "gpt-oss-20b-free" - routes: - - provider: openrouter - model: "openai/gpt-oss-20b:free" - - - name: "qwen3-coder-free" - routes: - - provider: openrouter - model: "qwen/qwen3-coder:free" - - - name: "qwen3-235b-free" - routes: - - provider: openrouter - model: "qwen/qwen3-235b-a22b-thinking-2507" - - - name: "glm-4.5-air-free" - routes: - - provider: openrouter - model: "z-ai/glm-4.5-air:free" - - - name: "nemotron-nano-free" - routes: - - provider: openrouter - model: "nvidia/nemotron-nano-9b-v2:free" - - - name: "trinity-large-free" - routes: - - provider: openrouter - model: "arcee-ai/trinity-large-preview:free" - - - name: "mistral-small-free" - routes: - - provider: openrouter - model: "mistralai/mistral-small-3.1-24b-instruct:free" - - - name: "gemma-3-27b-free" - routes: - - provider: openrouter - model: "google/gemma-3-27b-it:free" - - - name: "step-3.5-flash-free" - routes: - - provider: openrouter - model: "stepfun/step-3.5-flash:free" + # NOTE: Commented out — free models are heavily rate-limited upstream. + # Uncomment if you want best-effort free access. + # - name: "llama-3.3-70b-free" + # routes: + # - provider: openrouter + # model: "meta-llama/llama-3.3-70b-instruct:free" + # - name: "deepseek-r1-free" + # routes: + # - provider: openrouter + # model: "deepseek/deepseek-r1-0528:free" + # - name: "gpt-oss-free" + # routes: + # - provider: openrouter + # model: "openai/gpt-oss-120b:free" + # - name: "gpt-oss-20b-free" + # routes: + # - provider: openrouter + # model: "openai/gpt-oss-20b:free" + # - name: "qwen3-coder-free" + # routes: + # - provider: openrouter + # model: "qwen/qwen3-coder:free" + # - name: "qwen3-235b-free" + # routes: + # - provider: openrouter + # model: "qwen/qwen3-235b-a22b-thinking-2507" + # - name: "glm-4.5-air-free" + # routes: + # - provider: openrouter + # model: "z-ai/glm-4.5-air:free" + # - name: "nemotron-nano-free" + # routes: + # - provider: openrouter + # model: "nvidia/nemotron-nano-9b-v2:free" + # - name: "trinity-large-free" + # routes: + # - provider: openrouter + # model: "arcee-ai/trinity-large-preview:free" + # - name: "mistral-small-free" + # routes: + # - provider: openrouter + # model: "mistralai/mistral-small-3.1-24b-instruct:free" + # - name: "gemma-3-27b-free" + # routes: + # - provider: openrouter + # model: "google/gemma-3-27b-it:free" + # - name: "step-3.5-flash-free" + # routes: + # - provider: openrouter + # model: "stepfun/step-3.5-flash:free" # ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══ - name: "llama-3.3-70b" @@ -125,8 +116,11 @@ models: - provider: groq model: "llama-3.3-70b-versatile" pricing: { input: 0.59, output: 0.79 } + - provider: cerebras + model: "llama-3.3-70b" + pricing: { input: 0.85, output: 1.20 } - provider: deepinfra - model: "meta-llama/Llama-3.3-70B-Instruct" + model: "meta-llama/Llama-3.3-70B-Instruct-Turbo" pricing: { input: 0.23, output: 0.40 } - name: "llama-3.1-8b" @@ -162,6 +156,26 @@ models: model: "openai/gpt-oss-20b" pricing: { input: 0.04, output: 0.16 } + - name: "llama-4-scout" + routes: + - provider: groq + model: "meta-llama/llama-4-scout-17b-16e-instruct" + pricing: { input: 0.11, output: 0.34 } + + - name: "llama-4-maverick" + routes: + - provider: groq + model: "meta-llama/llama-4-maverick-17b-128e-instruct" + pricing: { input: 0.20, output: 0.60 } + + - name: "qwen3-32b" + routes: + - provider: groq + model: "qwen/qwen3-32b" + pricing: { input: 0.29, output: 0.59 } + - provider: cerebras + model: "qwen-3-32b" + # ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══ - name: "deepseek-v3.2" routes: @@ -192,22 +206,24 @@ models: - name: "deepseek-r1" routes: - provider: deepinfra - model: "deepseek-ai/DeepSeek-R1" - pricing: { input: 0.40, output: 1.60 } + model: "deepseek-ai/DeepSeek-R1-0528" - provider: openrouter model: "deepseek/deepseek-r1" - pricing: { input: 0.55, output: 2.19 } - - name: "deepseek-r1-distill-qwen-32b" + - name: "deepseek-r1-distill-llama-70b" routes: - provider: deepinfra - model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" - pricing: { input: 0.07, output: 0.16 } + model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" - - name: "devstral" + - name: "devstral-small" routes: - - provider: deepinfra - model: "mistralai/Devstral-Small-2505" + - provider: openrouter + model: "mistralai/devstral-small" + + - name: "devstral-medium" + routes: + - provider: openrouter + model: "mistralai/devstral-medium" # ═══ TIER 6: GLM ═══ - name: "glm-4.6" @@ -236,6 +252,9 @@ models: # ═══ TIER 7: Kimi ═══ - name: "kimi-k2" routes: + - provider: groq + model: "moonshotai/kimi-k2-instruct-0905" + pricing: { input: 1.00, output: 3.00 } - provider: deepinfra model: "moonshotai/Kimi-K2-Instruct-0905" pricing: { input: 0.50, output: 2.00 }