diff --git a/.env.example b/.env.example index b1e57f0..ecbc8c3 100644 --- a/.env.example +++ b/.env.example @@ -4,9 +4,9 @@ # cp .env.example .env # ============================================ -# --- LiteLLM --- -LITELLM_MASTER_KEY=sk-change-me-to-a-random-string -LITELLM_DB_PASSWORD=change-me-to-a-random-string +# --- new-api (LLM proxy) --- +# Admin access token for new-api management API (also used by init-channels.sh) +NEW_API_ACCESS_TOKEN=change-me-to-a-random-string OPENROUTER_API_KEY=sk-or-... SILICONFLOW_API_KEY=sk-... DEEPINFRA_API_KEY=... @@ -14,9 +14,12 @@ GROQ_API_KEY=gsk_... CEREBRAS_API_KEY=... # --- Open WebUI --- -# Virtual key from LiteLLM (create in LiteLLM UI → Virtual Keys) +# API token created in new-api (or via init-channels.sh) OPENWEBUI_API_KEY=sk-... +# --- Grafana --- +GRAFANA_ADMIN_PASSWORD=change-me-to-a-secure-password + # --- Cloudflare Tunnel --- # Create a tunnel in Cloudflare Zero Trust dashboard → Networks → Tunnels # Copy the token from the tunnel install command diff --git a/docker-compose.yml b/docker-compose.yml index 9ca5a5e..b9b9d53 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,48 +38,68 @@ services: - ANONYMIZED_TELEMETRY=FALSE restart: unless-stopped - # ── Database for LiteLLM ── - litellm-db: - image: postgres:16-alpine - volumes: - - litellm-db-data:/var/lib/postgresql/data - environment: - - POSTGRES_DB=litellm - - POSTGRES_USER=litellm - - POSTGRES_PASSWORD=${LITELLM_DB_PASSWORD} - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "pg_isready -U litellm"] - interval: 10s - timeout: 3s - retries: 3 + # # ── Database for LiteLLM (DEPRECATED — kept for rollback) ── + # litellm-db: + # image: postgres:16-alpine + # volumes: + # - litellm-db-data:/var/lib/postgresql/data + # environment: + # - POSTGRES_DB=litellm + # - POSTGRES_USER=litellm + # - POSTGRES_PASSWORD=${LITELLM_DB_PASSWORD} + # restart: unless-stopped + # healthcheck: + # test: ["CMD-SHELL", "pg_isready -U litellm"] + # interval: 10s + # timeout: 3s + # retries: 3 - # ── LLM API proxy ── - litellm: - image: ghcr.io/berriai/litellm:main-latest - command: ["--config", "/app/config.yaml", "--port", "4000"] - volumes: - - ./litellm/config.yaml:/app/config.yaml:ro + # # ── LLM API proxy (DEPRECATED — replaced by new-api) ── + # litellm: + # image: ghcr.io/berriai/litellm:main-latest + # command: ["--config", "/app/config.yaml", "--port", "4000"] + # volumes: + # - ./litellm/config.yaml:/app/config.yaml:ro + # ports: + # - "0.0.0.0:4000:4000" + # environment: + # - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY} + # - DATABASE_URL=postgresql://litellm:${LITELLM_DB_PASSWORD}@litellm-db:5432/litellm + # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + # - SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY} + # - DEEPINFRA_API_KEY=${DEEPINFRA_API_KEY} + # - GROQ_API_KEY=${GROQ_API_KEY} + # - CEREBRAS_API_KEY=${CEREBRAS_API_KEY} + # depends_on: + # litellm-db: + # condition: service_healthy + # restart: unless-stopped + # healthcheck: + # test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"] + # interval: 15s + # timeout: 5s + # retries: 5 + # start_period: 30s + + # ── LLM API proxy (new-api) ── + new-api: + image: calciumion/new-api:latest ports: - - "0.0.0.0:4000:4000" + - "0.0.0.0:4000:3000" + volumes: + - new-api-data:/data environment: - - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY} - - DATABASE_URL=postgresql://litellm:${LITELLM_DB_PASSWORD}@litellm-db:5432/litellm - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - - SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY} - - DEEPINFRA_API_KEY=${DEEPINFRA_API_KEY} - - GROQ_API_KEY=${GROQ_API_KEY} - - CEREBRAS_API_KEY=${CEREBRAS_API_KEY} - depends_on: - litellm-db: - condition: service_healthy + - SQL_DSN= + - TZ=UTC + - ENABLE_METRIC=true + - INITIAL_ROOT_ACCESS_TOKEN=${NEW_API_ACCESS_TOKEN} restart: unless-stopped healthcheck: - test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"] + test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/status"] interval: 15s timeout: 5s retries: 5 - start_period: 30s + start_period: 10s # ── Chat UI ── open-webui: @@ -90,7 +110,7 @@ services: - "0.0.0.0:3000:8080" environment: - OLLAMA_BASE_URL= - - OPENAI_API_BASE_URL=http://litellm:4000/v1 + - OPENAI_API_BASE_URL=http://new-api:3000/v1 - OPENAI_API_KEY=${OPENWEBUI_API_KEY} - ENABLE_RAG_WEB_SEARCH=true - RAG_WEB_SEARCH_ENGINE=searxng @@ -99,7 +119,7 @@ services: - CHROMA_HTTP_PORT=8000 - WEBUI_AUTH=true depends_on: - litellm: + new-api: condition: service_healthy restart: unless-stopped @@ -128,9 +148,69 @@ services: restart: unless-stopped network_mode: host + # ═══════════════════════════════════════════════ + # Monitoring stack + # ═══════════════════════════════════════════════ + + # ── Metrics store (Prometheus-compatible) ── + victoriametrics: + image: victoriametrics/victoria-metrics:latest + volumes: + - victoriametrics-data:/victoria-metrics-data + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro + command: + - "-promscrape.config=/etc/prometheus/prometheus.yml" + - "-retentionPeriod=90d" + - "-storageDataPath=/victoria-metrics-data" + ports: + - "127.0.0.1:8428:8428" + restart: unless-stopped + + # ── Dashboards ── + grafana: + image: grafana/grafana:latest + volumes: + - grafana-data:/var/lib/grafana + ports: + - "0.0.0.0:3001:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD} + - GF_USERS_ALLOW_SIGN_UP=false + depends_on: + - victoriametrics + restart: unless-stopped + + # ── Host system metrics ── + node-exporter: + image: prom/node-exporter:latest + pid: host + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - "--path.procfs=/host/proc" + - "--path.sysfs=/host/sys" + - "--path.rootfs=/rootfs" + - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" + restart: unless-stopped + + # ── Valkey/Redis metrics ── + redis-exporter: + image: oliver006/redis_exporter:latest + environment: + - REDIS_ADDR=redis://valkey:6379 + depends_on: + valkey: + condition: service_healthy + restart: unless-stopped + volumes: valkey-data: chromadb-data: litellm-db-data: + new-api-data: open-webui-data: tailscale-state: + victoriametrics-data: + grafana-data: diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml new file mode 100644 index 0000000..a992d6a --- /dev/null +++ b/monitoring/prometheus.yml @@ -0,0 +1,15 @@ +global: + scrape_interval: 30s + +scrape_configs: + - job_name: 'new-api' + static_configs: + - targets: ['new-api:3000'] + + - job_name: 'node' + static_configs: + - targets: ['node-exporter:9100'] + + - job_name: 'valkey' + static_configs: + - targets: ['redis-exporter:9121'] diff --git a/new-api/CHANNELS.md b/new-api/CHANNELS.md new file mode 100644 index 0000000..231b1a3 --- /dev/null +++ b/new-api/CHANNELS.md @@ -0,0 +1,147 @@ +# new-api Channel Configuration + +After first start, access the new-api web UI at `http://:4000` to configure channels. + +Default admin credentials: `root` / `123456` — **change immediately**. + +## API Token for Open WebUI + +Create an API token in new-api's token management. Use this token as `OPENWEBUI_API_KEY` in `.env`. + +## Channels to Create + +Configure each channel via **Channels > Add Channel** in the web UI. + +### 1. DeepInfra (Priority 1) + +| Field | Value | +|---|---| +| Name | DeepInfra | +| Type | OpenAI | +| Base URL | `https://api.deepinfra.com/v1/openai` | +| Key | `$DEEPINFRA_API_KEY` | +| Priority | 1 | +| Models | See model mapping below | + +### 2. SiliconFlow (Priority 2) + +| Field | Value | +|---|---| +| Name | SiliconFlow | +| Type | OpenAI | +| Base URL | `https://api.siliconflow.com/v1` | +| Key | `$SILICONFLOW_API_KEY` | +| Priority | 2 | +| Models | See model mapping below | + +### 3. OpenRouter (Priority 3) + +| Field | Value | +|---|---| +| Name | OpenRouter | +| Type | OpenAI | +| Base URL | `https://openrouter.ai/api/v1` | +| Key | `$OPENROUTER_API_KEY` | +| Priority | 3 | +| Models | See model mapping below | + +### 4. Groq (Priority 1) + +| Field | Value | +|---|---| +| Name | Groq | +| Type | OpenAI | +| Base URL | `https://api.groq.com/openai/v1` | +| Key | `$GROQ_API_KEY` | +| Priority | 1 | +| Models | `llama-3.3-70b` | + +### 5. Cerebras (Priority 1) + +| Field | Value | +|---|---| +| Name | Cerebras | +| Type | OpenAI | +| Base URL | `https://api.cerebras.ai/v1` | +| Key | `$CEREBRAS_API_KEY` | +| Priority | 1 | +| Models | `llama-3.3-70b-cerebras` | + +## Model Mapping per Channel + +new-api uses model aliasing: the "model name" is what clients see, the "actual model" is what's sent to the provider. + +### DeepInfra Models + +| Client Model Name | Actual Provider Model | +|---|---| +| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` | +| `deepseek-r1` | `deepseek-ai/DeepSeek-R1` | +| `gpt-oss` | `openai/gpt-oss-120b` | +| `gpt-oss-20b` | `openai/gpt-oss-20b` | +| `nemotron-super` | `nvidia/Llama-3.3-Nemotron-Super-49B-v1.5` | +| `nemotron-nano` | `nvidia/NVIDIA-Nemotron-Nano-9B-v2` | +| `devstral` | `mistralai/Devstral-Small-2505` | +| `glm-4.6` | `zai-org/GLM-4.6` | +| `glm-4.7` | `zai-org/GLM-4.7` | +| `glm-5` | `zai-org/GLM-5` | +| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` | +| `kimi-k2.5` | `moonshotai/Kimi-K2.5` | +| `deepseek-v3-free` | `deepseek-ai/DeepSeek-V3` | + +### SiliconFlow Models + +| Client Model Name | Actual Provider Model | +|---|---| +| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` | +| `glm-4.7` | `THUDM/GLM-4-32B-0414` | +| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` | +| `qwen3-coder` | `Qwen/Qwen3-Coder-480B-A35B-Instruct` | +| `qwen3-coder-30b` | `Qwen/Qwen3-Coder-30B-A3B-Instruct` | + +### OpenRouter Models + +| Client Model Name | Actual Provider Model | +|---|---| +| `deepseek-v3.2` | `deepseek/deepseek-chat-v3-0324` | +| `deepseek-v3-free` | `deepseek/deepseek-chat-v3-0324:free` | +| `kimi-k2.5` | `moonshotai/kimi-k2.5` | +| `minimax-m2.5` | `minimax/minimax-m2.5` | +| `gpt-4.1-mini` | `openai/gpt-4.1-mini` | +| `gpt-4.1` | `openai/gpt-4.1` | +| `gemini-3-flash-preview` | `google/gemini-3-flash-preview` | +| `gemini-2.5-pro` | `google/gemini-2.5-pro-preview` | +| `claude-sonnet` | `anthropic/claude-sonnet-4` | +| `trinity-large-preview` | `arcee-ai/trinity-large-preview` | + +### Groq Models + +| Client Model Name | Actual Provider Model | +|---|---| +| `llama-3.3-70b` | `llama-3.3-70b-versatile` | + +### Cerebras Models + +| Client Model Name | Actual Provider Model | +|---|---| +| `llama-3.3-70b-cerebras` | `llama-3.3-70b` | + +## Fallback Behavior + +new-api handles fallbacks via priority levels: +- When a model exists on multiple channels, the highest priority (lowest number) channel is tried first +- If it fails, it automatically falls back to the next priority level + +For example, `deepseek-v3.2` exists on: +1. DeepInfra (priority 1) — tried first +2. SiliconFlow (priority 2) — fallback +3. OpenRouter (priority 3) — last resort + +## Grafana Setup + +After first start, access Grafana at `http://:3001`: +1. Login with `admin` / `$GRAFANA_ADMIN_PASSWORD` +2. Add data source: **Prometheus** with URL `http://victoriametrics:8428` +3. Import dashboards: + - Node Exporter Full: dashboard ID `1860` + - Redis: dashboard ID `763` diff --git a/new-api/init-channels.sh b/new-api/init-channels.sh new file mode 100755 index 0000000..97f4474 --- /dev/null +++ b/new-api/init-channels.sh @@ -0,0 +1,161 @@ +#!/usr/bin/env bash +# Configures new-api channels and token via the admin API. +# Run once after first boot: ./new-api/init-channels.sh +# +# Requires these env vars (or .env file in project root): +# NEW_API_ACCESS_TOKEN - admin access token (set via INITIAL_ROOT_ACCESS_TOKEN) +# DEEPINFRA_API_KEY +# SILICONFLOW_API_KEY +# OPENROUTER_API_KEY +# GROQ_API_KEY +# CEREBRAS_API_KEY +# OPENWEBUI_API_KEY - token for Open WebUI to authenticate with new-api + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ENV_FILE="${SCRIPT_DIR}/../.env" + +# Load .env if present +if [[ -f "$ENV_FILE" ]]; then + set -a + # shellcheck disable=SC1090 + source "$ENV_FILE" + set +a +fi + +API_BASE="${NEW_API_BASE:-http://localhost:4000}" +TOKEN="${NEW_API_ACCESS_TOKEN:?Set NEW_API_ACCESS_TOKEN (from INITIAL_ROOT_ACCESS_TOKEN)}" + +# ── Helper ────────────────────────────────────────────── +create_channel() { + local name="$1" type="$2" key="$3" base_url="$4" priority="$5" models="$6" model_mapping="$7" + + echo "Creating channel: ${name} (priority ${priority})..." + + local payload + payload=$(python3 -c " +import json, sys +print(json.dumps({ + 'type': int(sys.argv[1]), + 'name': sys.argv[2], + 'key': sys.argv[3], + 'base_url': sys.argv[4], + 'models': sys.argv[5], + 'model_mapping': sys.argv[6], + 'priority': int(sys.argv[7]), + 'status': 1, + 'group': 'default', + 'weight': 1, + 'auto_ban': 1 +})) +" "$type" "$name" "$key" "$base_url" "$models" "$model_mapping" "$priority") + + local resp http_code body + resp=$(curl -s -w "\n%{http_code}" \ + "${API_BASE}/api/channel/" \ + -H "Authorization: Bearer ${TOKEN}" \ + -H "Content-Type: application/json" \ + -d "$payload") + + http_code=$(echo "$resp" | tail -1) + body=$(echo "$resp" | sed '$d') + + if [[ "$http_code" == "200" ]]; then + echo " OK" + else + echo " FAILED (HTTP ${http_code})" + echo " ${body}" | head -c 500 + echo + fi +} + +# Wait for new-api to be ready +echo "Waiting for new-api at ${API_BASE}..." +for i in $(seq 1 30); do + if curl -sf "${API_BASE}/api/status" > /dev/null 2>&1; then + echo "new-api is ready." + break + fi + if [[ "$i" == "30" ]]; then + echo "ERROR: new-api did not become ready in time." + exit 1 + fi + sleep 2 +done + +# ── Channel: DeepInfra (priority 1) ──────────────────── +create_channel "DeepInfra" 1 \ + "${DEEPINFRA_API_KEY:?}" \ + "https://api.deepinfra.com/v1/openai" \ + 1 \ + "deepseek-v3.2,deepseek-r1,gpt-oss,gpt-oss-20b,nemotron-super,nemotron-nano,devstral,glm-4.6,glm-4.7,glm-5,kimi-k2,kimi-k2.5" \ + '{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","deepseek-r1":"deepseek-ai/DeepSeek-R1","gpt-oss":"openai/gpt-oss-120b","gpt-oss-20b":"openai/gpt-oss-20b","nemotron-super":"nvidia/Llama-3.3-Nemotron-Super-49B-v1.5","nemotron-nano":"nvidia/NVIDIA-Nemotron-Nano-9B-v2","devstral":"mistralai/Devstral-Small-2505","glm-4.6":"zai-org/GLM-4.6","glm-4.7":"zai-org/GLM-4.7","glm-5":"zai-org/GLM-5","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","kimi-k2.5":"moonshotai/Kimi-K2.5"}' + +# ── Channel: SiliconFlow (priority 2) ────────────────── +create_channel "SiliconFlow" 1 \ + "${SILICONFLOW_API_KEY:?}" \ + "https://api.siliconflow.com/v1" \ + 2 \ + "deepseek-v3.2,glm-4.7,kimi-k2,qwen3-coder,qwen3-coder-30b" \ + '{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","glm-4.7":"THUDM/GLM-4-32B-0414","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","qwen3-coder":"Qwen/Qwen3-Coder-480B-A35B-Instruct","qwen3-coder-30b":"Qwen/Qwen3-Coder-30B-A3B-Instruct"}' + +# ── Channel: OpenRouter (priority 3) ─────────────────── +create_channel "OpenRouter" 1 \ + "${OPENROUTER_API_KEY:?}" \ + "https://openrouter.ai/api/v1" \ + 3 \ + "deepseek-v3.2,deepseek-v3-free,kimi-k2.5,minimax-m2.5,gpt-4.1-mini,gpt-4.1,gemini-3-flash-preview,gemini-2.5-pro,claude-sonnet,trinity-large-preview" \ + '{"deepseek-v3.2":"deepseek/deepseek-chat-v3-0324","deepseek-v3-free":"deepseek/deepseek-chat-v3-0324:free","kimi-k2.5":"moonshotai/kimi-k2.5","minimax-m2.5":"minimax/minimax-m2.5","gpt-4.1-mini":"openai/gpt-4.1-mini","gpt-4.1":"openai/gpt-4.1","gemini-3-flash-preview":"google/gemini-3-flash-preview","gemini-2.5-pro":"google/gemini-2.5-pro-preview","claude-sonnet":"anthropic/claude-sonnet-4","trinity-large-preview":"arcee-ai/trinity-large-preview"}' + +# ── Channel: Groq (priority 1) ───────────────────────── +create_channel "Groq" 1 \ + "${GROQ_API_KEY:?}" \ + "https://api.groq.com/openai/v1" \ + 1 \ + "llama-3.3-70b" \ + '{"llama-3.3-70b":"llama-3.3-70b-versatile"}' + +# ── Channel: Cerebras (priority 1) ───────────────────── +create_channel "Cerebras" 1 \ + "${CEREBRAS_API_KEY:?}" \ + "https://api.cerebras.ai/v1" \ + 1 \ + "llama-3.3-70b-cerebras" \ + '{"llama-3.3-70b-cerebras":"llama-3.3-70b"}' + +# ── Create API token for Open WebUI ──────────────────── +if [[ -n "${OPENWEBUI_API_KEY:-}" ]]; then + echo "" + echo "Creating API token for Open WebUI..." + TOKEN_RESP=$(curl -s "${API_BASE}/api/token/" \ + -H "Authorization: Bearer ${TOKEN}" \ + -H "Content-Type: application/json" \ + -d "$(python3 -c " +import json +print(json.dumps({ + 'name': 'open-webui', + 'remain_quota': 0, + 'unlimited_quota': True +})) +")") + echo "Token response: ${TOKEN_RESP}" | head -c 500 + echo "" + echo "" + echo "NOTE: Use the token 'key' from the response above as OPENAI_API_KEY in Open WebUI." + echo " Or create a token manually in the new-api UI." +fi + +echo "" +echo "══════════════════════════════════════" +echo "Channel setup complete!" +echo "" +echo "Next steps:" +echo " 1. Verify channels at ${API_BASE} (login: root / 123456 — CHANGE THIS)" +echo " 2. Test a model:" +echo " curl ${API_BASE}/v1/chat/completions \\" +echo " -H 'Authorization: Bearer ' \\" +echo " -H 'Content-Type: application/json' \\" +echo " -d '{\"model\":\"deepseek-v3.2\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}'" +echo " 3. Check Open WebUI can see models" +echo "══════════════════════════════════════" diff --git a/searxng/settings.yml b/searxng/settings.yml index 8a3d656..58dbce8 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -1,7 +1,7 @@ use_default_settings: true general: - instance_name: "SearXNG" + instance_name: "SearRST" privacypolicy_url: false donation_url: false enable_metrics: false