feat(gateway): update llm-gateway.yaml
This commit is contained in:
parent
8b741e58c7
commit
029b515651
1 changed files with 109 additions and 98 deletions
207
llm-gateway.yaml
207
llm-gateway.yaml
|
|
@ -58,7 +58,91 @@ providers:
|
||||||
timeout: 120s
|
timeout: 120s
|
||||||
|
|
||||||
models:
|
models:
|
||||||
# ── DeepSeek V3.2 ──
|
# ═══ TIER 1: Free (OpenRouter free models, $0) ═══
|
||||||
|
- name: "deepseek-v3-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "deepseek/deepseek-v3.2-20251201:free"
|
||||||
|
|
||||||
|
- name: "gemini-2.5-flash-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "google/gemini-2.5-flash:free"
|
||||||
|
|
||||||
|
- name: "gemini-2.0-flash-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "google/gemini-2.0-flash-001:free"
|
||||||
|
|
||||||
|
- name: "gpt-oss-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "openai/gpt-oss-120b:free"
|
||||||
|
|
||||||
|
- name: "gpt-5-nano-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "openai/gpt-5-nano-2025-08-07:free"
|
||||||
|
|
||||||
|
- name: "grok-4.1-fast-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "x-ai/grok-4.1-fast:free"
|
||||||
|
|
||||||
|
- name: "trinity-large-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "arcee-ai/trinity-large-preview:free"
|
||||||
|
|
||||||
|
- name: "kimi-k2.5-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "moonshotai/kimi-k2.5-0127:free"
|
||||||
|
|
||||||
|
# ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══
|
||||||
|
- name: "llama-3.3-70b"
|
||||||
|
routes:
|
||||||
|
- provider: groq
|
||||||
|
model: "llama-3.3-70b-versatile"
|
||||||
|
pricing: { input: 0.59, output: 0.79 }
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "meta-llama/Llama-3.3-70B-Instruct"
|
||||||
|
pricing: { input: 0.23, output: 0.40 }
|
||||||
|
|
||||||
|
- name: "llama-3.1-8b"
|
||||||
|
routes:
|
||||||
|
- provider: groq
|
||||||
|
model: "llama-3.1-8b-instant"
|
||||||
|
pricing: { input: 0.05, output: 0.08 }
|
||||||
|
- provider: cerebras
|
||||||
|
model: "llama3.1-8b"
|
||||||
|
pricing: { input: 0.10, output: 0.10 }
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||||
|
pricing: { input: 0.03, output: 0.05 }
|
||||||
|
|
||||||
|
- name: "gpt-oss"
|
||||||
|
routes:
|
||||||
|
- provider: groq
|
||||||
|
model: "openai/gpt-oss-120b"
|
||||||
|
pricing: { input: 0.15, output: 0.60 }
|
||||||
|
- provider: cerebras
|
||||||
|
model: "gpt-oss-120b"
|
||||||
|
pricing: { input: 0.35, output: 0.75 }
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "openai/gpt-oss-120b"
|
||||||
|
pricing: { input: 0.05, output: 0.24 }
|
||||||
|
|
||||||
|
- name: "gpt-oss-20b"
|
||||||
|
routes:
|
||||||
|
- provider: groq
|
||||||
|
model: "openai/gpt-oss-20b"
|
||||||
|
pricing: { input: 0.075, output: 0.30 }
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "openai/gpt-oss-20b"
|
||||||
|
pricing: { input: 0.04, output: 0.16 }
|
||||||
|
|
||||||
|
# ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══
|
||||||
- name: "deepseek-v3.2"
|
- name: "deepseek-v3.2"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
|
|
@ -71,30 +155,7 @@ models:
|
||||||
model: "deepseek/deepseek-chat-v3-0324"
|
model: "deepseek/deepseek-chat-v3-0324"
|
||||||
pricing: { input: 0.30, output: 0.88 }
|
pricing: { input: 0.30, output: 0.88 }
|
||||||
|
|
||||||
# ── DeepSeek R1 ──
|
# ═══ TIER 4: Ultra-cheap DeepInfra ═══
|
||||||
- name: "deepseek-r1"
|
|
||||||
routes:
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "deepseek-ai/DeepSeek-R1"
|
|
||||||
pricing: { input: 0.40, output: 1.60 }
|
|
||||||
- provider: openrouter
|
|
||||||
model: "deepseek/deepseek-r1"
|
|
||||||
pricing: { input: 0.55, output: 2.19 }
|
|
||||||
|
|
||||||
# ── GPT-OSS (OpenAI open-weight MoE) ──
|
|
||||||
- name: "gpt-oss"
|
|
||||||
routes:
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "openai/gpt-oss-120b"
|
|
||||||
pricing: { input: 0.05, output: 0.24 }
|
|
||||||
|
|
||||||
- name: "gpt-oss-20b"
|
|
||||||
routes:
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "openai/gpt-oss-20b"
|
|
||||||
pricing: { input: 0.04, output: 0.16 }
|
|
||||||
|
|
||||||
# ── Nemotron ──
|
|
||||||
- name: "nemotron-super"
|
- name: "nemotron-super"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
|
|
@ -107,13 +168,28 @@ models:
|
||||||
model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
|
model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
|
||||||
pricing: { input: 0.04, output: 0.16 }
|
pricing: { input: 0.04, output: 0.16 }
|
||||||
|
|
||||||
# ── Devstral ──
|
# ═══ TIER 5: DeepSeek R1 & reasoning ═══
|
||||||
|
- name: "deepseek-r1"
|
||||||
|
routes:
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "deepseek-ai/DeepSeek-R1"
|
||||||
|
pricing: { input: 0.40, output: 1.60 }
|
||||||
|
- provider: openrouter
|
||||||
|
model: "deepseek/deepseek-r1"
|
||||||
|
pricing: { input: 0.55, output: 2.19 }
|
||||||
|
|
||||||
|
- name: "deepseek-r1-distill-qwen-32b"
|
||||||
|
routes:
|
||||||
|
- provider: deepinfra
|
||||||
|
model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
||||||
|
pricing: { input: 0.07, output: 0.16 }
|
||||||
|
|
||||||
- name: "devstral"
|
- name: "devstral"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "mistralai/Devstral-Small-2505"
|
model: "mistralai/Devstral-Small-2505"
|
||||||
|
|
||||||
# ── GLM ──
|
# ═══ TIER 6: GLM ═══
|
||||||
- name: "glm-4.6"
|
- name: "glm-4.6"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
|
|
@ -125,6 +201,9 @@ models:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "zai-org/GLM-4.7"
|
model: "zai-org/GLM-4.7"
|
||||||
pricing: { input: 0.40, output: 1.75 }
|
pricing: { input: 0.40, output: 1.75 }
|
||||||
|
- provider: cerebras
|
||||||
|
model: "zai-glm-4.7"
|
||||||
|
pricing: { input: 2.25, output: 2.75 }
|
||||||
- provider: siliconflow
|
- provider: siliconflow
|
||||||
model: "THUDM/GLM-4-32B-0414"
|
model: "THUDM/GLM-4-32B-0414"
|
||||||
|
|
||||||
|
|
@ -134,7 +213,7 @@ models:
|
||||||
model: "zai-org/GLM-5"
|
model: "zai-org/GLM-5"
|
||||||
pricing: { input: 0.80, output: 2.56 }
|
pricing: { input: 0.80, output: 2.56 }
|
||||||
|
|
||||||
# ── Kimi ──
|
# ═══ TIER 7: Kimi ═══
|
||||||
- name: "kimi-k2"
|
- name: "kimi-k2"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
|
|
@ -152,7 +231,7 @@ models:
|
||||||
- provider: openrouter
|
- provider: openrouter
|
||||||
model: "moonshotai/kimi-k2.5"
|
model: "moonshotai/kimi-k2.5"
|
||||||
|
|
||||||
# ── Qwen3 Coder ──
|
# ═══ TIER 8: SiliconFlow (Qwen) ═══
|
||||||
- name: "qwen3-coder"
|
- name: "qwen3-coder"
|
||||||
routes:
|
routes:
|
||||||
- provider: siliconflow
|
- provider: siliconflow
|
||||||
|
|
@ -164,70 +243,7 @@ models:
|
||||||
- provider: siliconflow
|
- provider: siliconflow
|
||||||
model: "Qwen/Qwen3-Coder-30B-A3B-Instruct"
|
model: "Qwen/Qwen3-Coder-30B-A3B-Instruct"
|
||||||
|
|
||||||
# ── Llama ──
|
# ═══ TIER 9: OpenRouter premium (paid) ═══
|
||||||
- name: "llama-3.3-70b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "llama-3.3-70b-versatile"
|
|
||||||
- provider: cerebras
|
|
||||||
model: "llama-3.3-70b"
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "meta-llama/Llama-3.3-70B-Instruct"
|
|
||||||
pricing: { input: 0.23, output: 0.40 }
|
|
||||||
|
|
||||||
- name: "llama-3.1-8b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "llama-3.1-8b-instant"
|
|
||||||
- provider: cerebras
|
|
||||||
model: "llama-3.1-8b"
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
||||||
pricing: { input: 0.03, output: 0.05 }
|
|
||||||
|
|
||||||
# ── Qwen 2.5 ──
|
|
||||||
- name: "qwen-2.5-72b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "qwen-2.5-72b"
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "Qwen/Qwen2.5-72B-Instruct"
|
|
||||||
pricing: { input: 0.23, output: 0.40 }
|
|
||||||
|
|
||||||
- name: "qwen-2.5-coder-32b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "qwen-2.5-coder-32b"
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
||||||
pricing: { input: 0.07, output: 0.16 }
|
|
||||||
|
|
||||||
# ── Other ──
|
|
||||||
- name: "gemma-2-9b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "gemma2-9b-it"
|
|
||||||
|
|
||||||
- name: "deepseek-r1-distill-llama-70b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "deepseek-r1-distill-llama-70b"
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
|
||||||
pricing: { input: 0.23, output: 0.69 }
|
|
||||||
|
|
||||||
- name: "deepseek-r1-distill-qwen-32b"
|
|
||||||
routes:
|
|
||||||
- provider: deepinfra
|
|
||||||
model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
|
||||||
pricing: { input: 0.07, output: 0.16 }
|
|
||||||
|
|
||||||
# ── OpenRouter premium ──
|
|
||||||
- name: "deepseek-v3-free"
|
|
||||||
routes:
|
|
||||||
- provider: openrouter
|
|
||||||
model: "deepseek/deepseek-chat-v3-0324:free"
|
|
||||||
|
|
||||||
- name: "minimax-m2.5"
|
- name: "minimax-m2.5"
|
||||||
routes:
|
routes:
|
||||||
- provider: openrouter
|
- provider: openrouter
|
||||||
|
|
@ -257,8 +273,3 @@ models:
|
||||||
routes:
|
routes:
|
||||||
- provider: openrouter
|
- provider: openrouter
|
||||||
model: "anthropic/claude-sonnet-4"
|
model: "anthropic/claude-sonnet-4"
|
||||||
|
|
||||||
- name: "trinity-large-preview"
|
|
||||||
routes:
|
|
||||||
- provider: openrouter
|
|
||||||
model: "arcee-ai/trinity-large-preview"
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue