Compare commits
No commits in common. "d4747d7dfc135ef07103ea9eb3c725f0e9691dcd" and "225671eec49104ea37c84b3875f6baadb7c70caf" have entirely different histories.
d4747d7dfc
...
225671eec4
1 changed files with 69 additions and 88 deletions
157
llm-gateway.yaml
157
llm-gateway.yaml
|
|
@ -59,56 +59,65 @@ providers:
|
||||||
|
|
||||||
models:
|
models:
|
||||||
# ═══ TIER 1: Free (OpenRouter free models, $0) ═══
|
# ═══ TIER 1: Free (OpenRouter free models, $0) ═══
|
||||||
# NOTE: Commented out — free models are heavily rate-limited upstream.
|
- name: "llama-3.3-70b-free"
|
||||||
# Uncomment if you want best-effort free access.
|
routes:
|
||||||
# - name: "llama-3.3-70b-free"
|
- provider: openrouter
|
||||||
# routes:
|
model: "meta-llama/llama-3.3-70b-instruct:free"
|
||||||
# - provider: openrouter
|
|
||||||
# model: "meta-llama/llama-3.3-70b-instruct:free"
|
- name: "deepseek-r1-free"
|
||||||
# - name: "deepseek-r1-free"
|
routes:
|
||||||
# routes:
|
- provider: openrouter
|
||||||
# - provider: openrouter
|
model: "deepseek/deepseek-r1-0528:free"
|
||||||
# model: "deepseek/deepseek-r1-0528:free"
|
|
||||||
# - name: "gpt-oss-free"
|
- name: "gpt-oss-free"
|
||||||
# routes:
|
routes:
|
||||||
# - provider: openrouter
|
- provider: openrouter
|
||||||
# model: "openai/gpt-oss-120b:free"
|
model: "openai/gpt-oss-120b:free"
|
||||||
# - name: "gpt-oss-20b-free"
|
|
||||||
# routes:
|
- name: "gpt-oss-20b-free"
|
||||||
# - provider: openrouter
|
routes:
|
||||||
# model: "openai/gpt-oss-20b:free"
|
- provider: openrouter
|
||||||
# - name: "qwen3-coder-free"
|
model: "openai/gpt-oss-20b:free"
|
||||||
# routes:
|
|
||||||
# - provider: openrouter
|
- name: "qwen3-coder-free"
|
||||||
# model: "qwen/qwen3-coder:free"
|
routes:
|
||||||
# - name: "qwen3-235b-free"
|
- provider: openrouter
|
||||||
# routes:
|
model: "qwen/qwen3-coder:free"
|
||||||
# - provider: openrouter
|
|
||||||
# model: "qwen/qwen3-235b-a22b-thinking-2507"
|
- name: "qwen3-235b-free"
|
||||||
# - name: "glm-4.5-air-free"
|
routes:
|
||||||
# routes:
|
- provider: openrouter
|
||||||
# - provider: openrouter
|
model: "qwen/qwen3-235b-a22b-thinking-2507"
|
||||||
# model: "z-ai/glm-4.5-air:free"
|
|
||||||
# - name: "nemotron-nano-free"
|
- name: "glm-4.5-air-free"
|
||||||
# routes:
|
routes:
|
||||||
# - provider: openrouter
|
- provider: openrouter
|
||||||
# model: "nvidia/nemotron-nano-9b-v2:free"
|
model: "z-ai/glm-4.5-air:free"
|
||||||
# - name: "trinity-large-free"
|
|
||||||
# routes:
|
- name: "nemotron-nano-free"
|
||||||
# - provider: openrouter
|
routes:
|
||||||
# model: "arcee-ai/trinity-large-preview:free"
|
- provider: openrouter
|
||||||
# - name: "mistral-small-free"
|
model: "nvidia/nemotron-nano-9b-v2:free"
|
||||||
# routes:
|
|
||||||
# - provider: openrouter
|
- name: "trinity-large-free"
|
||||||
# model: "mistralai/mistral-small-3.1-24b-instruct:free"
|
routes:
|
||||||
# - name: "gemma-3-27b-free"
|
- provider: openrouter
|
||||||
# routes:
|
model: "arcee-ai/trinity-large-preview:free"
|
||||||
# - provider: openrouter
|
|
||||||
# model: "google/gemma-3-27b-it:free"
|
- name: "mistral-small-free"
|
||||||
# - name: "step-3.5-flash-free"
|
routes:
|
||||||
# routes:
|
- provider: openrouter
|
||||||
# - provider: openrouter
|
model: "mistralai/mistral-small-3.1-24b-instruct:free"
|
||||||
# model: "stepfun/step-3.5-flash:free"
|
|
||||||
|
- name: "gemma-3-27b-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "google/gemma-3-27b-it:free"
|
||||||
|
|
||||||
|
- name: "step-3.5-flash-free"
|
||||||
|
routes:
|
||||||
|
- provider: openrouter
|
||||||
|
model: "stepfun/step-3.5-flash:free"
|
||||||
|
|
||||||
# ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══
|
# ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══
|
||||||
- name: "llama-3.3-70b"
|
- name: "llama-3.3-70b"
|
||||||
|
|
@ -117,11 +126,8 @@ models:
|
||||||
model: "llama-3.3-70b-versatile"
|
model: "llama-3.3-70b-versatile"
|
||||||
pricing: { input: 0.59, output: 0.79 }
|
pricing: { input: 0.59, output: 0.79 }
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
model: "meta-llama/Llama-3.3-70B-Instruct"
|
||||||
pricing: { input: 0.23, output: 0.40 }
|
pricing: { input: 0.23, output: 0.40 }
|
||||||
- provider: cerebras
|
|
||||||
model: "llama-3.3-70b"
|
|
||||||
pricing: { input: 0.85, output: 1.20 }
|
|
||||||
|
|
||||||
- name: "llama-3.1-8b"
|
- name: "llama-3.1-8b"
|
||||||
routes:
|
routes:
|
||||||
|
|
@ -156,26 +162,6 @@ models:
|
||||||
model: "openai/gpt-oss-20b"
|
model: "openai/gpt-oss-20b"
|
||||||
pricing: { input: 0.04, output: 0.16 }
|
pricing: { input: 0.04, output: 0.16 }
|
||||||
|
|
||||||
- name: "llama-4-scout"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "meta-llama/llama-4-scout-17b-16e-instruct"
|
|
||||||
pricing: { input: 0.11, output: 0.34 }
|
|
||||||
|
|
||||||
- name: "llama-4-maverick"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "meta-llama/llama-4-maverick-17b-128e-instruct"
|
|
||||||
pricing: { input: 0.20, output: 0.60 }
|
|
||||||
|
|
||||||
- name: "qwen3-32b"
|
|
||||||
routes:
|
|
||||||
- provider: groq
|
|
||||||
model: "qwen/qwen3-32b"
|
|
||||||
pricing: { input: 0.29, output: 0.59 }
|
|
||||||
- provider: cerebras
|
|
||||||
model: "qwen-3-32b"
|
|
||||||
|
|
||||||
# ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══
|
# ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══
|
||||||
- name: "deepseek-v3.2"
|
- name: "deepseek-v3.2"
|
||||||
routes:
|
routes:
|
||||||
|
|
@ -206,24 +192,22 @@ models:
|
||||||
- name: "deepseek-r1"
|
- name: "deepseek-r1"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "deepseek-ai/DeepSeek-R1-0528"
|
model: "deepseek-ai/DeepSeek-R1"
|
||||||
|
pricing: { input: 0.40, output: 1.60 }
|
||||||
- provider: openrouter
|
- provider: openrouter
|
||||||
model: "deepseek/deepseek-r1"
|
model: "deepseek/deepseek-r1"
|
||||||
|
pricing: { input: 0.55, output: 2.19 }
|
||||||
|
|
||||||
- name: "deepseek-r1-distill-llama-70b"
|
- name: "deepseek-r1-distill-qwen-32b"
|
||||||
routes:
|
routes:
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
||||||
|
pricing: { input: 0.07, output: 0.16 }
|
||||||
|
|
||||||
- name: "devstral-small"
|
- name: "devstral"
|
||||||
routes:
|
routes:
|
||||||
- provider: openrouter
|
- provider: deepinfra
|
||||||
model: "mistralai/devstral-small"
|
model: "mistralai/Devstral-Small-2505"
|
||||||
|
|
||||||
- name: "devstral-medium"
|
|
||||||
routes:
|
|
||||||
- provider: openrouter
|
|
||||||
model: "mistralai/devstral-medium"
|
|
||||||
|
|
||||||
# ═══ TIER 6: GLM ═══
|
# ═══ TIER 6: GLM ═══
|
||||||
- name: "glm-4.6"
|
- name: "glm-4.6"
|
||||||
|
|
@ -252,9 +236,6 @@ models:
|
||||||
# ═══ TIER 7: Kimi ═══
|
# ═══ TIER 7: Kimi ═══
|
||||||
- name: "kimi-k2"
|
- name: "kimi-k2"
|
||||||
routes:
|
routes:
|
||||||
- provider: groq
|
|
||||||
model: "moonshotai/kimi-k2-instruct-0905"
|
|
||||||
pricing: { input: 1.00, output: 3.00 }
|
|
||||||
- provider: deepinfra
|
- provider: deepinfra
|
||||||
model: "moonshotai/Kimi-K2-Instruct-0905"
|
model: "moonshotai/Kimi-K2-Instruct-0905"
|
||||||
pricing: { input: 0.50, output: 2.00 }
|
pricing: { input: 0.50, output: 2.00 }
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue