264 lines
6.7 KiB
YAML
264 lines
6.7 KiB
YAML
server:
|
|
listen: "0.0.0.0:3000"
|
|
request_timeout: 300s
|
|
max_request_body_mb: 10
|
|
session_secret: "${SESSION_SECRET}"
|
|
default_admin:
|
|
username: "${ADMIN_USERNAME}"
|
|
password: "${ADMIN_PASSWORD}"
|
|
|
|
tokens:
|
|
- name: "open-webui"
|
|
key: "${OPENWEBUI_API_KEY}"
|
|
rate_limit_rpm: 0 # unlimited
|
|
daily_budget_usd: 5.0
|
|
- name: "rayandrew"
|
|
key: "${PERSONAL_API_KEY}"
|
|
rate_limit_rpm: 0 # unlimited
|
|
daily_budget_usd: 10.0
|
|
|
|
pricing_lookup:
|
|
# url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default
|
|
refresh_interval: 6h
|
|
|
|
database:
|
|
path: "/data/gateway.db"
|
|
retention_days: 90
|
|
|
|
cache:
|
|
enabled: true
|
|
address: "valkey:6379"
|
|
ttl: 3600
|
|
|
|
providers:
|
|
- name: deepinfra
|
|
base_url: "https://api.deepinfra.com/v1/openai"
|
|
api_key: "${DEEPINFRA_API_KEY}"
|
|
priority: 1
|
|
timeout: 120s
|
|
- name: siliconflow
|
|
base_url: "https://api.siliconflow.com/v1"
|
|
api_key: "${SILICONFLOW_API_KEY}"
|
|
priority: 2
|
|
timeout: 120s
|
|
- name: openrouter
|
|
base_url: "https://openrouter.ai/api/v1"
|
|
api_key: "${OPENROUTER_API_KEY}"
|
|
priority: 3
|
|
timeout: 120s
|
|
- name: groq
|
|
base_url: "https://api.groq.com/openai/v1"
|
|
api_key: "${GROQ_API_KEY}"
|
|
priority: 1
|
|
timeout: 120s
|
|
- name: cerebras
|
|
base_url: "https://api.cerebras.ai/v1"
|
|
api_key: "${CEREBRAS_API_KEY}"
|
|
priority: 1
|
|
timeout: 120s
|
|
|
|
models:
|
|
# ── DeepSeek V3.2 ──
|
|
- name: "deepseek-v3.2"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "deepseek-ai/DeepSeek-V3.2"
|
|
pricing: { input: 0.26, output: 0.38 }
|
|
- provider: siliconflow
|
|
model: "deepseek-ai/DeepSeek-V3.2"
|
|
pricing: { input: 0.27, output: 0.42 }
|
|
- provider: openrouter
|
|
model: "deepseek/deepseek-chat-v3-0324"
|
|
pricing: { input: 0.30, output: 0.88 }
|
|
|
|
# ── DeepSeek R1 ──
|
|
- name: "deepseek-r1"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "deepseek-ai/DeepSeek-R1"
|
|
pricing: { input: 0.40, output: 1.60 }
|
|
- provider: openrouter
|
|
model: "deepseek/deepseek-r1"
|
|
pricing: { input: 0.55, output: 2.19 }
|
|
|
|
# ── GPT-OSS (OpenAI open-weight MoE) ──
|
|
- name: "gpt-oss"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "openai/gpt-oss-120b"
|
|
pricing: { input: 0.05, output: 0.24 }
|
|
|
|
- name: "gpt-oss-20b"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "openai/gpt-oss-20b"
|
|
pricing: { input: 0.04, output: 0.16 }
|
|
|
|
# ── Nemotron ──
|
|
- name: "nemotron-super"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5"
|
|
pricing: { input: 0.10, output: 0.40 }
|
|
|
|
- name: "nemotron-nano"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
|
|
pricing: { input: 0.04, output: 0.16 }
|
|
|
|
# ── Devstral ──
|
|
- name: "devstral"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "mistralai/Devstral-Small-2505"
|
|
|
|
# ── GLM ──
|
|
- name: "glm-4.6"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "zai-org/GLM-4.6"
|
|
pricing: { input: 0.60, output: 1.90 }
|
|
|
|
- name: "glm-4.7"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "zai-org/GLM-4.7"
|
|
pricing: { input: 0.40, output: 1.75 }
|
|
- provider: siliconflow
|
|
model: "THUDM/GLM-4-32B-0414"
|
|
|
|
- name: "glm-5"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "zai-org/GLM-5"
|
|
pricing: { input: 0.80, output: 2.56 }
|
|
|
|
# ── Kimi ──
|
|
- name: "kimi-k2"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "moonshotai/Kimi-K2-Instruct-0905"
|
|
pricing: { input: 0.50, output: 2.00 }
|
|
- provider: siliconflow
|
|
model: "moonshotai/Kimi-K2-Instruct-0905"
|
|
pricing: { input: 0.58, output: 2.29 }
|
|
|
|
- name: "kimi-k2.5"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "moonshotai/Kimi-K2.5"
|
|
pricing: { input: 0.45, output: 2.25 }
|
|
- provider: openrouter
|
|
model: "moonshotai/kimi-k2.5"
|
|
|
|
# ── Qwen3 Coder ──
|
|
- name: "qwen3-coder"
|
|
routes:
|
|
- provider: siliconflow
|
|
model: "Qwen/Qwen3-Coder-480B-A35B-Instruct"
|
|
pricing: { input: 1.14, output: 2.28 }
|
|
|
|
- name: "qwen3-coder-30b"
|
|
routes:
|
|
- provider: siliconflow
|
|
model: "Qwen/Qwen3-Coder-30B-A3B-Instruct"
|
|
|
|
# ── Llama ──
|
|
- name: "llama-3.3-70b"
|
|
routes:
|
|
- provider: groq
|
|
model: "llama-3.3-70b-versatile"
|
|
- provider: cerebras
|
|
model: "llama-3.3-70b"
|
|
- provider: deepinfra
|
|
model: "meta-llama/Llama-3.3-70B-Instruct"
|
|
pricing: { input: 0.23, output: 0.40 }
|
|
|
|
- name: "llama-3.1-8b"
|
|
routes:
|
|
- provider: groq
|
|
model: "llama-3.1-8b-instant"
|
|
- provider: cerebras
|
|
model: "llama-3.1-8b"
|
|
- provider: deepinfra
|
|
model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
pricing: { input: 0.03, output: 0.05 }
|
|
|
|
# ── Qwen 2.5 ──
|
|
- name: "qwen-2.5-72b"
|
|
routes:
|
|
- provider: groq
|
|
model: "qwen-2.5-72b"
|
|
- provider: deepinfra
|
|
model: "Qwen/Qwen2.5-72B-Instruct"
|
|
pricing: { input: 0.23, output: 0.40 }
|
|
|
|
- name: "qwen-2.5-coder-32b"
|
|
routes:
|
|
- provider: groq
|
|
model: "qwen-2.5-coder-32b"
|
|
- provider: deepinfra
|
|
model: "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
pricing: { input: 0.07, output: 0.16 }
|
|
|
|
# ── Other ──
|
|
- name: "gemma-2-9b"
|
|
routes:
|
|
- provider: groq
|
|
model: "gemma2-9b-it"
|
|
|
|
- name: "deepseek-r1-distill-llama-70b"
|
|
routes:
|
|
- provider: groq
|
|
model: "deepseek-r1-distill-llama-70b"
|
|
- provider: deepinfra
|
|
model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
|
pricing: { input: 0.23, output: 0.69 }
|
|
|
|
- name: "deepseek-r1-distill-qwen-32b"
|
|
routes:
|
|
- provider: deepinfra
|
|
model: "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
|
pricing: { input: 0.07, output: 0.16 }
|
|
|
|
# ── OpenRouter premium ──
|
|
- name: "deepseek-v3-free"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "deepseek/deepseek-chat-v3-0324:free"
|
|
|
|
- name: "minimax-m2.5"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "minimax/minimax-m2.5"
|
|
|
|
- name: "gpt-4.1-mini"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "openai/gpt-4.1-mini"
|
|
|
|
- name: "gpt-4.1"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "openai/gpt-4.1"
|
|
|
|
- name: "gemini-3-flash-preview"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "google/gemini-3-flash-preview"
|
|
|
|
- name: "gemini-2.5-pro"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "google/gemini-2.5-pro-preview"
|
|
|
|
- name: "claude-sonnet"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "anthropic/claude-sonnet-4"
|
|
|
|
- name: "trinity-large-preview"
|
|
routes:
|
|
- provider: openrouter
|
|
model: "arcee-ai/trinity-large-preview"
|