feat: replace LiteLLM with new-api for LLM proxy and add monitoring stack
This commit is contained in:
parent
718cf928f3
commit
964f9e43cd
6 changed files with 447 additions and 41 deletions
11
.env.example
11
.env.example
|
|
@ -4,9 +4,9 @@
|
||||||
# cp .env.example .env
|
# cp .env.example .env
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# --- LiteLLM ---
|
# --- new-api (LLM proxy) ---
|
||||||
LITELLM_MASTER_KEY=sk-change-me-to-a-random-string
|
# Admin access token for new-api management API (also used by init-channels.sh)
|
||||||
LITELLM_DB_PASSWORD=change-me-to-a-random-string
|
NEW_API_ACCESS_TOKEN=change-me-to-a-random-string
|
||||||
OPENROUTER_API_KEY=sk-or-...
|
OPENROUTER_API_KEY=sk-or-...
|
||||||
SILICONFLOW_API_KEY=sk-...
|
SILICONFLOW_API_KEY=sk-...
|
||||||
DEEPINFRA_API_KEY=...
|
DEEPINFRA_API_KEY=...
|
||||||
|
|
@ -14,9 +14,12 @@ GROQ_API_KEY=gsk_...
|
||||||
CEREBRAS_API_KEY=...
|
CEREBRAS_API_KEY=...
|
||||||
|
|
||||||
# --- Open WebUI ---
|
# --- Open WebUI ---
|
||||||
# Virtual key from LiteLLM (create in LiteLLM UI → Virtual Keys)
|
# API token created in new-api (or via init-channels.sh)
|
||||||
OPENWEBUI_API_KEY=sk-...
|
OPENWEBUI_API_KEY=sk-...
|
||||||
|
|
||||||
|
# --- Grafana ---
|
||||||
|
GRAFANA_ADMIN_PASSWORD=change-me-to-a-secure-password
|
||||||
|
|
||||||
# --- Cloudflare Tunnel ---
|
# --- Cloudflare Tunnel ---
|
||||||
# Create a tunnel in Cloudflare Zero Trust dashboard → Networks → Tunnels
|
# Create a tunnel in Cloudflare Zero Trust dashboard → Networks → Tunnels
|
||||||
# Copy the token from the tunnel install command
|
# Copy the token from the tunnel install command
|
||||||
|
|
|
||||||
|
|
@ -38,48 +38,68 @@ services:
|
||||||
- ANONYMIZED_TELEMETRY=FALSE
|
- ANONYMIZED_TELEMETRY=FALSE
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
# ── Database for LiteLLM ──
|
# # ── Database for LiteLLM (DEPRECATED — kept for rollback) ──
|
||||||
litellm-db:
|
# litellm-db:
|
||||||
image: postgres:16-alpine
|
# image: postgres:16-alpine
|
||||||
volumes:
|
# volumes:
|
||||||
- litellm-db-data:/var/lib/postgresql/data
|
# - litellm-db-data:/var/lib/postgresql/data
|
||||||
environment:
|
# environment:
|
||||||
- POSTGRES_DB=litellm
|
# - POSTGRES_DB=litellm
|
||||||
- POSTGRES_USER=litellm
|
# - POSTGRES_USER=litellm
|
||||||
- POSTGRES_PASSWORD=${LITELLM_DB_PASSWORD}
|
# - POSTGRES_PASSWORD=${LITELLM_DB_PASSWORD}
|
||||||
restart: unless-stopped
|
# restart: unless-stopped
|
||||||
healthcheck:
|
# healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -U litellm"]
|
# test: ["CMD-SHELL", "pg_isready -U litellm"]
|
||||||
interval: 10s
|
# interval: 10s
|
||||||
timeout: 3s
|
# timeout: 3s
|
||||||
retries: 3
|
# retries: 3
|
||||||
|
|
||||||
# ── LLM API proxy ──
|
# # ── LLM API proxy (DEPRECATED — replaced by new-api) ──
|
||||||
litellm:
|
# litellm:
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
# image: ghcr.io/berriai/litellm:main-latest
|
||||||
command: ["--config", "/app/config.yaml", "--port", "4000"]
|
# command: ["--config", "/app/config.yaml", "--port", "4000"]
|
||||||
volumes:
|
# volumes:
|
||||||
- ./litellm/config.yaml:/app/config.yaml:ro
|
# - ./litellm/config.yaml:/app/config.yaml:ro
|
||||||
|
# ports:
|
||||||
|
# - "0.0.0.0:4000:4000"
|
||||||
|
# environment:
|
||||||
|
# - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY}
|
||||||
|
# - DATABASE_URL=postgresql://litellm:${LITELLM_DB_PASSWORD}@litellm-db:5432/litellm
|
||||||
|
# - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||||
|
# - SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY}
|
||||||
|
# - DEEPINFRA_API_KEY=${DEEPINFRA_API_KEY}
|
||||||
|
# - GROQ_API_KEY=${GROQ_API_KEY}
|
||||||
|
# - CEREBRAS_API_KEY=${CEREBRAS_API_KEY}
|
||||||
|
# depends_on:
|
||||||
|
# litellm-db:
|
||||||
|
# condition: service_healthy
|
||||||
|
# restart: unless-stopped
|
||||||
|
# healthcheck:
|
||||||
|
# test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
|
||||||
|
# interval: 15s
|
||||||
|
# timeout: 5s
|
||||||
|
# retries: 5
|
||||||
|
# start_period: 30s
|
||||||
|
|
||||||
|
# ── LLM API proxy (new-api) ──
|
||||||
|
new-api:
|
||||||
|
image: calciumion/new-api:latest
|
||||||
ports:
|
ports:
|
||||||
- "0.0.0.0:4000:4000"
|
- "0.0.0.0:4000:3000"
|
||||||
|
volumes:
|
||||||
|
- new-api-data:/data
|
||||||
environment:
|
environment:
|
||||||
- LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY}
|
- SQL_DSN=
|
||||||
- DATABASE_URL=postgresql://litellm:${LITELLM_DB_PASSWORD}@litellm-db:5432/litellm
|
- TZ=UTC
|
||||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
- ENABLE_METRIC=true
|
||||||
- SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY}
|
- INITIAL_ROOT_ACCESS_TOKEN=${NEW_API_ACCESS_TOKEN}
|
||||||
- DEEPINFRA_API_KEY=${DEEPINFRA_API_KEY}
|
|
||||||
- GROQ_API_KEY=${GROQ_API_KEY}
|
|
||||||
- CEREBRAS_API_KEY=${CEREBRAS_API_KEY}
|
|
||||||
depends_on:
|
|
||||||
litellm-db:
|
|
||||||
condition: service_healthy
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/status"]
|
||||||
interval: 15s
|
interval: 15s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
start_period: 30s
|
start_period: 10s
|
||||||
|
|
||||||
# ── Chat UI ──
|
# ── Chat UI ──
|
||||||
open-webui:
|
open-webui:
|
||||||
|
|
@ -90,7 +110,7 @@ services:
|
||||||
- "0.0.0.0:3000:8080"
|
- "0.0.0.0:3000:8080"
|
||||||
environment:
|
environment:
|
||||||
- OLLAMA_BASE_URL=
|
- OLLAMA_BASE_URL=
|
||||||
- OPENAI_API_BASE_URL=http://litellm:4000/v1
|
- OPENAI_API_BASE_URL=http://new-api:3000/v1
|
||||||
- OPENAI_API_KEY=${OPENWEBUI_API_KEY}
|
- OPENAI_API_KEY=${OPENWEBUI_API_KEY}
|
||||||
- ENABLE_RAG_WEB_SEARCH=true
|
- ENABLE_RAG_WEB_SEARCH=true
|
||||||
- RAG_WEB_SEARCH_ENGINE=searxng
|
- RAG_WEB_SEARCH_ENGINE=searxng
|
||||||
|
|
@ -99,7 +119,7 @@ services:
|
||||||
- CHROMA_HTTP_PORT=8000
|
- CHROMA_HTTP_PORT=8000
|
||||||
- WEBUI_AUTH=true
|
- WEBUI_AUTH=true
|
||||||
depends_on:
|
depends_on:
|
||||||
litellm:
|
new-api:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
@ -128,9 +148,69 @@ services:
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
network_mode: host
|
network_mode: host
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════
|
||||||
|
# Monitoring stack
|
||||||
|
# ═══════════════════════════════════════════════
|
||||||
|
|
||||||
|
# ── Metrics store (Prometheus-compatible) ──
|
||||||
|
victoriametrics:
|
||||||
|
image: victoriametrics/victoria-metrics:latest
|
||||||
|
volumes:
|
||||||
|
- victoriametrics-data:/victoria-metrics-data
|
||||||
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
command:
|
||||||
|
- "-promscrape.config=/etc/prometheus/prometheus.yml"
|
||||||
|
- "-retentionPeriod=90d"
|
||||||
|
- "-storageDataPath=/victoria-metrics-data"
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8428:8428"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# ── Dashboards ──
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
ports:
|
||||||
|
- "0.0.0.0:3001:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
|
||||||
|
- GF_USERS_ALLOW_SIGN_UP=false
|
||||||
|
depends_on:
|
||||||
|
- victoriametrics
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# ── Host system metrics ──
|
||||||
|
node-exporter:
|
||||||
|
image: prom/node-exporter:latest
|
||||||
|
pid: host
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
command:
|
||||||
|
- "--path.procfs=/host/proc"
|
||||||
|
- "--path.sysfs=/host/sys"
|
||||||
|
- "--path.rootfs=/rootfs"
|
||||||
|
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# ── Valkey/Redis metrics ──
|
||||||
|
redis-exporter:
|
||||||
|
image: oliver006/redis_exporter:latest
|
||||||
|
environment:
|
||||||
|
- REDIS_ADDR=redis://valkey:6379
|
||||||
|
depends_on:
|
||||||
|
valkey:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
valkey-data:
|
valkey-data:
|
||||||
chromadb-data:
|
chromadb-data:
|
||||||
litellm-db-data:
|
litellm-db-data:
|
||||||
|
new-api-data:
|
||||||
open-webui-data:
|
open-webui-data:
|
||||||
tailscale-state:
|
tailscale-state:
|
||||||
|
victoriametrics-data:
|
||||||
|
grafana-data:
|
||||||
|
|
|
||||||
15
monitoring/prometheus.yml
Normal file
15
monitoring/prometheus.yml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 30s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'new-api'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['new-api:3000']
|
||||||
|
|
||||||
|
- job_name: 'node'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter:9100']
|
||||||
|
|
||||||
|
- job_name: 'valkey'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['redis-exporter:9121']
|
||||||
147
new-api/CHANNELS.md
Normal file
147
new-api/CHANNELS.md
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
# new-api Channel Configuration
|
||||||
|
|
||||||
|
After first start, access the new-api web UI at `http://<server>:4000` to configure channels.
|
||||||
|
|
||||||
|
Default admin credentials: `root` / `123456` — **change immediately**.
|
||||||
|
|
||||||
|
## API Token for Open WebUI
|
||||||
|
|
||||||
|
Create an API token in new-api's token management. Use this token as `OPENWEBUI_API_KEY` in `.env`.
|
||||||
|
|
||||||
|
## Channels to Create
|
||||||
|
|
||||||
|
Configure each channel via **Channels > Add Channel** in the web UI.
|
||||||
|
|
||||||
|
### 1. DeepInfra (Priority 1)
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|---|---|
|
||||||
|
| Name | DeepInfra |
|
||||||
|
| Type | OpenAI |
|
||||||
|
| Base URL | `https://api.deepinfra.com/v1/openai` |
|
||||||
|
| Key | `$DEEPINFRA_API_KEY` |
|
||||||
|
| Priority | 1 |
|
||||||
|
| Models | See model mapping below |
|
||||||
|
|
||||||
|
### 2. SiliconFlow (Priority 2)
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|---|---|
|
||||||
|
| Name | SiliconFlow |
|
||||||
|
| Type | OpenAI |
|
||||||
|
| Base URL | `https://api.siliconflow.com/v1` |
|
||||||
|
| Key | `$SILICONFLOW_API_KEY` |
|
||||||
|
| Priority | 2 |
|
||||||
|
| Models | See model mapping below |
|
||||||
|
|
||||||
|
### 3. OpenRouter (Priority 3)
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|---|---|
|
||||||
|
| Name | OpenRouter |
|
||||||
|
| Type | OpenAI |
|
||||||
|
| Base URL | `https://openrouter.ai/api/v1` |
|
||||||
|
| Key | `$OPENROUTER_API_KEY` |
|
||||||
|
| Priority | 3 |
|
||||||
|
| Models | See model mapping below |
|
||||||
|
|
||||||
|
### 4. Groq (Priority 1)
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|---|---|
|
||||||
|
| Name | Groq |
|
||||||
|
| Type | OpenAI |
|
||||||
|
| Base URL | `https://api.groq.com/openai/v1` |
|
||||||
|
| Key | `$GROQ_API_KEY` |
|
||||||
|
| Priority | 1 |
|
||||||
|
| Models | `llama-3.3-70b` |
|
||||||
|
|
||||||
|
### 5. Cerebras (Priority 1)
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|---|---|
|
||||||
|
| Name | Cerebras |
|
||||||
|
| Type | OpenAI |
|
||||||
|
| Base URL | `https://api.cerebras.ai/v1` |
|
||||||
|
| Key | `$CEREBRAS_API_KEY` |
|
||||||
|
| Priority | 1 |
|
||||||
|
| Models | `llama-3.3-70b-cerebras` |
|
||||||
|
|
||||||
|
## Model Mapping per Channel
|
||||||
|
|
||||||
|
new-api uses model aliasing: the "model name" is what clients see, the "actual model" is what's sent to the provider.
|
||||||
|
|
||||||
|
### DeepInfra Models
|
||||||
|
|
||||||
|
| Client Model Name | Actual Provider Model |
|
||||||
|
|---|---|
|
||||||
|
| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` |
|
||||||
|
| `deepseek-r1` | `deepseek-ai/DeepSeek-R1` |
|
||||||
|
| `gpt-oss` | `openai/gpt-oss-120b` |
|
||||||
|
| `gpt-oss-20b` | `openai/gpt-oss-20b` |
|
||||||
|
| `nemotron-super` | `nvidia/Llama-3.3-Nemotron-Super-49B-v1.5` |
|
||||||
|
| `nemotron-nano` | `nvidia/NVIDIA-Nemotron-Nano-9B-v2` |
|
||||||
|
| `devstral` | `mistralai/Devstral-Small-2505` |
|
||||||
|
| `glm-4.6` | `zai-org/GLM-4.6` |
|
||||||
|
| `glm-4.7` | `zai-org/GLM-4.7` |
|
||||||
|
| `glm-5` | `zai-org/GLM-5` |
|
||||||
|
| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` |
|
||||||
|
| `kimi-k2.5` | `moonshotai/Kimi-K2.5` |
|
||||||
|
| `deepseek-v3-free` | `deepseek-ai/DeepSeek-V3` |
|
||||||
|
|
||||||
|
### SiliconFlow Models
|
||||||
|
|
||||||
|
| Client Model Name | Actual Provider Model |
|
||||||
|
|---|---|
|
||||||
|
| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` |
|
||||||
|
| `glm-4.7` | `THUDM/GLM-4-32B-0414` |
|
||||||
|
| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` |
|
||||||
|
| `qwen3-coder` | `Qwen/Qwen3-Coder-480B-A35B-Instruct` |
|
||||||
|
| `qwen3-coder-30b` | `Qwen/Qwen3-Coder-30B-A3B-Instruct` |
|
||||||
|
|
||||||
|
### OpenRouter Models
|
||||||
|
|
||||||
|
| Client Model Name | Actual Provider Model |
|
||||||
|
|---|---|
|
||||||
|
| `deepseek-v3.2` | `deepseek/deepseek-chat-v3-0324` |
|
||||||
|
| `deepseek-v3-free` | `deepseek/deepseek-chat-v3-0324:free` |
|
||||||
|
| `kimi-k2.5` | `moonshotai/kimi-k2.5` |
|
||||||
|
| `minimax-m2.5` | `minimax/minimax-m2.5` |
|
||||||
|
| `gpt-4.1-mini` | `openai/gpt-4.1-mini` |
|
||||||
|
| `gpt-4.1` | `openai/gpt-4.1` |
|
||||||
|
| `gemini-3-flash-preview` | `google/gemini-3-flash-preview` |
|
||||||
|
| `gemini-2.5-pro` | `google/gemini-2.5-pro-preview` |
|
||||||
|
| `claude-sonnet` | `anthropic/claude-sonnet-4` |
|
||||||
|
| `trinity-large-preview` | `arcee-ai/trinity-large-preview` |
|
||||||
|
|
||||||
|
### Groq Models
|
||||||
|
|
||||||
|
| Client Model Name | Actual Provider Model |
|
||||||
|
|---|---|
|
||||||
|
| `llama-3.3-70b` | `llama-3.3-70b-versatile` |
|
||||||
|
|
||||||
|
### Cerebras Models
|
||||||
|
|
||||||
|
| Client Model Name | Actual Provider Model |
|
||||||
|
|---|---|
|
||||||
|
| `llama-3.3-70b-cerebras` | `llama-3.3-70b` |
|
||||||
|
|
||||||
|
## Fallback Behavior
|
||||||
|
|
||||||
|
new-api handles fallbacks via priority levels:
|
||||||
|
- When a model exists on multiple channels, the highest priority (lowest number) channel is tried first
|
||||||
|
- If it fails, it automatically falls back to the next priority level
|
||||||
|
|
||||||
|
For example, `deepseek-v3.2` exists on:
|
||||||
|
1. DeepInfra (priority 1) — tried first
|
||||||
|
2. SiliconFlow (priority 2) — fallback
|
||||||
|
3. OpenRouter (priority 3) — last resort
|
||||||
|
|
||||||
|
## Grafana Setup
|
||||||
|
|
||||||
|
After first start, access Grafana at `http://<server>:3001`:
|
||||||
|
1. Login with `admin` / `$GRAFANA_ADMIN_PASSWORD`
|
||||||
|
2. Add data source: **Prometheus** with URL `http://victoriametrics:8428`
|
||||||
|
3. Import dashboards:
|
||||||
|
- Node Exporter Full: dashboard ID `1860`
|
||||||
|
- Redis: dashboard ID `763`
|
||||||
161
new-api/init-channels.sh
Executable file
161
new-api/init-channels.sh
Executable file
|
|
@ -0,0 +1,161 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Configures new-api channels and token via the admin API.
|
||||||
|
# Run once after first boot: ./new-api/init-channels.sh
|
||||||
|
#
|
||||||
|
# Requires these env vars (or .env file in project root):
|
||||||
|
# NEW_API_ACCESS_TOKEN - admin access token (set via INITIAL_ROOT_ACCESS_TOKEN)
|
||||||
|
# DEEPINFRA_API_KEY
|
||||||
|
# SILICONFLOW_API_KEY
|
||||||
|
# OPENROUTER_API_KEY
|
||||||
|
# GROQ_API_KEY
|
||||||
|
# CEREBRAS_API_KEY
|
||||||
|
# OPENWEBUI_API_KEY - token for Open WebUI to authenticate with new-api
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
ENV_FILE="${SCRIPT_DIR}/../.env"
|
||||||
|
|
||||||
|
# Load .env if present
|
||||||
|
if [[ -f "$ENV_FILE" ]]; then
|
||||||
|
set -a
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "$ENV_FILE"
|
||||||
|
set +a
|
||||||
|
fi
|
||||||
|
|
||||||
|
API_BASE="${NEW_API_BASE:-http://localhost:4000}"
|
||||||
|
TOKEN="${NEW_API_ACCESS_TOKEN:?Set NEW_API_ACCESS_TOKEN (from INITIAL_ROOT_ACCESS_TOKEN)}"
|
||||||
|
|
||||||
|
# ── Helper ──────────────────────────────────────────────
|
||||||
|
create_channel() {
|
||||||
|
local name="$1" type="$2" key="$3" base_url="$4" priority="$5" models="$6" model_mapping="$7"
|
||||||
|
|
||||||
|
echo "Creating channel: ${name} (priority ${priority})..."
|
||||||
|
|
||||||
|
local payload
|
||||||
|
payload=$(python3 -c "
|
||||||
|
import json, sys
|
||||||
|
print(json.dumps({
|
||||||
|
'type': int(sys.argv[1]),
|
||||||
|
'name': sys.argv[2],
|
||||||
|
'key': sys.argv[3],
|
||||||
|
'base_url': sys.argv[4],
|
||||||
|
'models': sys.argv[5],
|
||||||
|
'model_mapping': sys.argv[6],
|
||||||
|
'priority': int(sys.argv[7]),
|
||||||
|
'status': 1,
|
||||||
|
'group': 'default',
|
||||||
|
'weight': 1,
|
||||||
|
'auto_ban': 1
|
||||||
|
}))
|
||||||
|
" "$type" "$name" "$key" "$base_url" "$models" "$model_mapping" "$priority")
|
||||||
|
|
||||||
|
local resp http_code body
|
||||||
|
resp=$(curl -s -w "\n%{http_code}" \
|
||||||
|
"${API_BASE}/api/channel/" \
|
||||||
|
-H "Authorization: Bearer ${TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$payload")
|
||||||
|
|
||||||
|
http_code=$(echo "$resp" | tail -1)
|
||||||
|
body=$(echo "$resp" | sed '$d')
|
||||||
|
|
||||||
|
if [[ "$http_code" == "200" ]]; then
|
||||||
|
echo " OK"
|
||||||
|
else
|
||||||
|
echo " FAILED (HTTP ${http_code})"
|
||||||
|
echo " ${body}" | head -c 500
|
||||||
|
echo
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Wait for new-api to be ready
|
||||||
|
echo "Waiting for new-api at ${API_BASE}..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if curl -sf "${API_BASE}/api/status" > /dev/null 2>&1; then
|
||||||
|
echo "new-api is ready."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [[ "$i" == "30" ]]; then
|
||||||
|
echo "ERROR: new-api did not become ready in time."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
# ── Channel: DeepInfra (priority 1) ────────────────────
|
||||||
|
create_channel "DeepInfra" 1 \
|
||||||
|
"${DEEPINFRA_API_KEY:?}" \
|
||||||
|
"https://api.deepinfra.com/v1/openai" \
|
||||||
|
1 \
|
||||||
|
"deepseek-v3.2,deepseek-r1,gpt-oss,gpt-oss-20b,nemotron-super,nemotron-nano,devstral,glm-4.6,glm-4.7,glm-5,kimi-k2,kimi-k2.5" \
|
||||||
|
'{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","deepseek-r1":"deepseek-ai/DeepSeek-R1","gpt-oss":"openai/gpt-oss-120b","gpt-oss-20b":"openai/gpt-oss-20b","nemotron-super":"nvidia/Llama-3.3-Nemotron-Super-49B-v1.5","nemotron-nano":"nvidia/NVIDIA-Nemotron-Nano-9B-v2","devstral":"mistralai/Devstral-Small-2505","glm-4.6":"zai-org/GLM-4.6","glm-4.7":"zai-org/GLM-4.7","glm-5":"zai-org/GLM-5","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","kimi-k2.5":"moonshotai/Kimi-K2.5"}'
|
||||||
|
|
||||||
|
# ── Channel: SiliconFlow (priority 2) ──────────────────
|
||||||
|
create_channel "SiliconFlow" 1 \
|
||||||
|
"${SILICONFLOW_API_KEY:?}" \
|
||||||
|
"https://api.siliconflow.com/v1" \
|
||||||
|
2 \
|
||||||
|
"deepseek-v3.2,glm-4.7,kimi-k2,qwen3-coder,qwen3-coder-30b" \
|
||||||
|
'{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","glm-4.7":"THUDM/GLM-4-32B-0414","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","qwen3-coder":"Qwen/Qwen3-Coder-480B-A35B-Instruct","qwen3-coder-30b":"Qwen/Qwen3-Coder-30B-A3B-Instruct"}'
|
||||||
|
|
||||||
|
# ── Channel: OpenRouter (priority 3) ───────────────────
|
||||||
|
create_channel "OpenRouter" 1 \
|
||||||
|
"${OPENROUTER_API_KEY:?}" \
|
||||||
|
"https://openrouter.ai/api/v1" \
|
||||||
|
3 \
|
||||||
|
"deepseek-v3.2,deepseek-v3-free,kimi-k2.5,minimax-m2.5,gpt-4.1-mini,gpt-4.1,gemini-3-flash-preview,gemini-2.5-pro,claude-sonnet,trinity-large-preview" \
|
||||||
|
'{"deepseek-v3.2":"deepseek/deepseek-chat-v3-0324","deepseek-v3-free":"deepseek/deepseek-chat-v3-0324:free","kimi-k2.5":"moonshotai/kimi-k2.5","minimax-m2.5":"minimax/minimax-m2.5","gpt-4.1-mini":"openai/gpt-4.1-mini","gpt-4.1":"openai/gpt-4.1","gemini-3-flash-preview":"google/gemini-3-flash-preview","gemini-2.5-pro":"google/gemini-2.5-pro-preview","claude-sonnet":"anthropic/claude-sonnet-4","trinity-large-preview":"arcee-ai/trinity-large-preview"}'
|
||||||
|
|
||||||
|
# ── Channel: Groq (priority 1) ─────────────────────────
|
||||||
|
create_channel "Groq" 1 \
|
||||||
|
"${GROQ_API_KEY:?}" \
|
||||||
|
"https://api.groq.com/openai/v1" \
|
||||||
|
1 \
|
||||||
|
"llama-3.3-70b" \
|
||||||
|
'{"llama-3.3-70b":"llama-3.3-70b-versatile"}'
|
||||||
|
|
||||||
|
# ── Channel: Cerebras (priority 1) ─────────────────────
|
||||||
|
create_channel "Cerebras" 1 \
|
||||||
|
"${CEREBRAS_API_KEY:?}" \
|
||||||
|
"https://api.cerebras.ai/v1" \
|
||||||
|
1 \
|
||||||
|
"llama-3.3-70b-cerebras" \
|
||||||
|
'{"llama-3.3-70b-cerebras":"llama-3.3-70b"}'
|
||||||
|
|
||||||
|
# ── Create API token for Open WebUI ────────────────────
|
||||||
|
if [[ -n "${OPENWEBUI_API_KEY:-}" ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "Creating API token for Open WebUI..."
|
||||||
|
TOKEN_RESP=$(curl -s "${API_BASE}/api/token/" \
|
||||||
|
-H "Authorization: Bearer ${TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$(python3 -c "
|
||||||
|
import json
|
||||||
|
print(json.dumps({
|
||||||
|
'name': 'open-webui',
|
||||||
|
'remain_quota': 0,
|
||||||
|
'unlimited_quota': True
|
||||||
|
}))
|
||||||
|
")")
|
||||||
|
echo "Token response: ${TOKEN_RESP}" | head -c 500
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "NOTE: Use the token 'key' from the response above as OPENAI_API_KEY in Open WebUI."
|
||||||
|
echo " Or create a token manually in the new-api UI."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "══════════════════════════════════════"
|
||||||
|
echo "Channel setup complete!"
|
||||||
|
echo ""
|
||||||
|
echo "Next steps:"
|
||||||
|
echo " 1. Verify channels at ${API_BASE} (login: root / 123456 — CHANGE THIS)"
|
||||||
|
echo " 2. Test a model:"
|
||||||
|
echo " curl ${API_BASE}/v1/chat/completions \\"
|
||||||
|
echo " -H 'Authorization: Bearer <token>' \\"
|
||||||
|
echo " -H 'Content-Type: application/json' \\"
|
||||||
|
echo " -d '{\"model\":\"deepseek-v3.2\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}'"
|
||||||
|
echo " 3. Check Open WebUI can see models"
|
||||||
|
echo "══════════════════════════════════════"
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use_default_settings: true
|
use_default_settings: true
|
||||||
|
|
||||||
general:
|
general:
|
||||||
instance_name: "SearXNG"
|
instance_name: "SearRST"
|
||||||
privacypolicy_url: false
|
privacypolicy_url: false
|
||||||
donation_url: false
|
donation_url: false
|
||||||
enable_metrics: false
|
enable_metrics: false
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue