diff --git a/llm-gateway.yaml b/llm-gateway.yaml index 9bd4c13..0825e23 100644 --- a/llm-gateway.yaml +++ b/llm-gateway.yaml @@ -58,7 +58,91 @@ providers: timeout: 120s models: - # ── DeepSeek V3.2 ── + # ═══ TIER 1: Free (OpenRouter free models, $0) ═══ + - name: "deepseek-v3-free" + routes: + - provider: openrouter + model: "deepseek/deepseek-v3.2-20251201:free" + + - name: "gemini-2.5-flash-free" + routes: + - provider: openrouter + model: "google/gemini-2.5-flash:free" + + - name: "gemini-2.0-flash-free" + routes: + - provider: openrouter + model: "google/gemini-2.0-flash-001:free" + + - name: "gpt-oss-free" + routes: + - provider: openrouter + model: "openai/gpt-oss-120b:free" + + - name: "gpt-5-nano-free" + routes: + - provider: openrouter + model: "openai/gpt-5-nano-2025-08-07:free" + + - name: "grok-4.1-fast-free" + routes: + - provider: openrouter + model: "x-ai/grok-4.1-fast:free" + + - name: "trinity-large-free" + routes: + - provider: openrouter + model: "arcee-ai/trinity-large-preview:free" + + - name: "kimi-k2.5-free" + routes: + - provider: openrouter + model: "moonshotai/kimi-k2.5-0127:free" + + # ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══ + - name: "llama-3.3-70b" + routes: + - provider: groq + model: "llama-3.3-70b-versatile" + pricing: { input: 0.59, output: 0.79 } + - provider: deepinfra + model: "meta-llama/Llama-3.3-70B-Instruct" + pricing: { input: 0.23, output: 0.40 } + + - name: "llama-3.1-8b" + routes: + - provider: groq + model: "llama-3.1-8b-instant" + pricing: { input: 0.05, output: 0.08 } + - provider: cerebras + model: "llama3.1-8b" + pricing: { input: 0.10, output: 0.10 } + - provider: deepinfra + model: "meta-llama/Meta-Llama-3.1-8B-Instruct" + pricing: { input: 0.03, output: 0.05 } + + - name: "gpt-oss" + routes: + - provider: groq + model: "openai/gpt-oss-120b" + pricing: { input: 0.15, output: 0.60 } + - provider: cerebras + model: "gpt-oss-120b" + pricing: { input: 0.35, output: 0.75 } + - provider: deepinfra + model: "openai/gpt-oss-120b" + pricing: { input: 0.05, output: 0.24 } + + - name: "gpt-oss-20b" + routes: + - provider: groq + model: "openai/gpt-oss-20b" + pricing: { input: 0.075, output: 0.30 } + - provider: deepinfra + model: "openai/gpt-oss-20b" + pricing: { input: 0.04, output: 0.16 } + + # ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══ - name: "deepseek-v3.2" routes: - provider: deepinfra @@ -71,30 +155,7 @@ models: model: "deepseek/deepseek-chat-v3-0324" pricing: { input: 0.30, output: 0.88 } - # ── DeepSeek R1 ── - - name: "deepseek-r1" - routes: - - provider: deepinfra - model: "deepseek-ai/DeepSeek-R1" - pricing: { input: 0.40, output: 1.60 } - - provider: openrouter - model: "deepseek/deepseek-r1" - pricing: { input: 0.55, output: 2.19 } - - # ── GPT-OSS (OpenAI open-weight MoE) ── - - name: "gpt-oss" - routes: - - provider: deepinfra - model: "openai/gpt-oss-120b" - pricing: { input: 0.05, output: 0.24 } - - - name: "gpt-oss-20b" - routes: - - provider: deepinfra - model: "openai/gpt-oss-20b" - pricing: { input: 0.04, output: 0.16 } - - # ── Nemotron ── + # ═══ TIER 4: Ultra-cheap DeepInfra ═══ - name: "nemotron-super" routes: - provider: deepinfra @@ -107,13 +168,28 @@ models: model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2" pricing: { input: 0.04, output: 0.16 } - # ── Devstral ── + # ═══ TIER 5: DeepSeek R1 & reasoning ═══ + - name: "deepseek-r1" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1" + pricing: { input: 0.40, output: 1.60 } + - provider: openrouter + model: "deepseek/deepseek-r1" + pricing: { input: 0.55, output: 2.19 } + + - name: "deepseek-r1-distill-qwen-32b" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + pricing: { input: 0.07, output: 0.16 } + - name: "devstral" routes: - provider: deepinfra model: "mistralai/Devstral-Small-2505" - # ── GLM ── + # ═══ TIER 6: GLM ═══ - name: "glm-4.6" routes: - provider: deepinfra @@ -125,6 +201,9 @@ models: - provider: deepinfra model: "zai-org/GLM-4.7" pricing: { input: 0.40, output: 1.75 } + - provider: cerebras + model: "zai-glm-4.7" + pricing: { input: 2.25, output: 2.75 } - provider: siliconflow model: "THUDM/GLM-4-32B-0414" @@ -134,7 +213,7 @@ models: model: "zai-org/GLM-5" pricing: { input: 0.80, output: 2.56 } - # ── Kimi ── + # ═══ TIER 7: Kimi ═══ - name: "kimi-k2" routes: - provider: deepinfra @@ -152,7 +231,7 @@ models: - provider: openrouter model: "moonshotai/kimi-k2.5" - # ── Qwen3 Coder ── + # ═══ TIER 8: SiliconFlow (Qwen) ═══ - name: "qwen3-coder" routes: - provider: siliconflow @@ -164,70 +243,7 @@ models: - provider: siliconflow model: "Qwen/Qwen3-Coder-30B-A3B-Instruct" - # ── Llama ── - - name: "llama-3.3-70b" - routes: - - provider: groq - model: "llama-3.3-70b-versatile" - - provider: cerebras - model: "llama-3.3-70b" - - provider: deepinfra - model: "meta-llama/Llama-3.3-70B-Instruct" - pricing: { input: 0.23, output: 0.40 } - - - name: "llama-3.1-8b" - routes: - - provider: groq - model: "llama-3.1-8b-instant" - - provider: cerebras - model: "llama-3.1-8b" - - provider: deepinfra - model: "meta-llama/Meta-Llama-3.1-8B-Instruct" - pricing: { input: 0.03, output: 0.05 } - - # ── Qwen 2.5 ── - - name: "qwen-2.5-72b" - routes: - - provider: groq - model: "qwen-2.5-72b" - - provider: deepinfra - model: "Qwen/Qwen2.5-72B-Instruct" - pricing: { input: 0.23, output: 0.40 } - - - name: "qwen-2.5-coder-32b" - routes: - - provider: groq - model: "qwen-2.5-coder-32b" - - provider: deepinfra - model: "Qwen/Qwen2.5-Coder-32B-Instruct" - pricing: { input: 0.07, output: 0.16 } - - # ── Other ── - - name: "gemma-2-9b" - routes: - - provider: groq - model: "gemma2-9b-it" - - - name: "deepseek-r1-distill-llama-70b" - routes: - - provider: groq - model: "deepseek-r1-distill-llama-70b" - - provider: deepinfra - model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" - pricing: { input: 0.23, output: 0.69 } - - - name: "deepseek-r1-distill-qwen-32b" - routes: - - provider: deepinfra - model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" - pricing: { input: 0.07, output: 0.16 } - - # ── OpenRouter premium ── - - name: "deepseek-v3-free" - routes: - - provider: openrouter - model: "deepseek/deepseek-chat-v3-0324:free" - + # ═══ TIER 9: OpenRouter premium (paid) ═══ - name: "minimax-m2.5" routes: - provider: openrouter @@ -257,8 +273,3 @@ models: routes: - provider: openrouter model: "anthropic/claude-sonnet-4" - - - name: "trinity-large-preview" - routes: - - provider: openrouter - model: "arcee-ai/trinity-large-preview"