diff --git a/litellm/config.yaml b/litellm/config.yaml deleted file mode 100644 index b8b6331..0000000 --- a/litellm/config.yaml +++ /dev/null @@ -1,233 +0,0 @@ -model_list: - # ═══════════════════════════════════════════════ - # TIER 1: Free providers (try first) - # ═══════════════════════════════════════════════ - - # --- Groq (free tier, very fast) --- - - model_name: llama-3.3-70b - litellm_params: - model: groq/llama-3.3-70b-versatile - api_key: os.environ/GROQ_API_KEY - - # --- Cerebras (free tier, very fast) --- - - model_name: llama-3.3-70b-cerebras - litellm_params: - model: cerebras/llama-3.3-70b - api_key: os.environ/CEREBRAS_API_KEY - - # --- OpenRouter free models --- - - model_name: deepseek-v3-free - litellm_params: - model: openrouter/deepseek/deepseek-chat-v3-0324:free - api_key: os.environ/OPENROUTER_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 2: DeepSeek V3.2 (cheapest first) - # ═══════════════════════════════════════════════ - - # DeepSeek V3.2 via DeepInfra ($0.26 in / $0.38 out per M) - - model_name: deepseek-v3.2 - litellm_params: - model: deepinfra/deepseek-ai/DeepSeek-V3.2 - api_key: os.environ/DEEPINFRA_API_KEY - - # DeepSeek V3.2 fallback via SiliconFlow ($0.27 in / $0.42 out per M) - - model_name: deepseek-v3.2 - litellm_params: - model: openai/deepseek-ai/DeepSeek-V3.2 - api_base: https://api.siliconflow.com/v1 - api_key: os.environ/SILICONFLOW_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 3: Ultra-cheap DeepInfra models - # ═══════════════════════════════════════════════ - - # GPT-OSS-120B — OpenAI open-weight MoE ($0.05 in / $0.24 out per M) - - model_name: gpt-oss - litellm_params: - model: deepinfra/openai/gpt-oss-120b - api_key: os.environ/DEEPINFRA_API_KEY - - # GPT-OSS-20B — lower latency variant ($0.04 in / $0.16 out per M) - - model_name: gpt-oss-20b - litellm_params: - model: deepinfra/openai/gpt-oss-20b - api_key: os.environ/DEEPINFRA_API_KEY - - # Nemotron Super 49B — near-flagship quality ($0.10 in / $0.40 out per M) - - model_name: nemotron-super - litellm_params: - model: deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5 - api_key: os.environ/DEEPINFRA_API_KEY - - # Nemotron Nano 9B — dirt cheap for simple tasks ($0.04 in / $0.16 out per M) - - model_name: nemotron-nano - litellm_params: - model: deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2 - api_key: os.environ/DEEPINFRA_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 4: Other DeepInfra models - # ═══════════════════════════════════════════════ - - - model_name: deepseek-r1 - litellm_params: - model: deepinfra/deepseek-ai/DeepSeek-R1 - api_key: os.environ/DEEPINFRA_API_KEY - - - model_name: devstral - litellm_params: - model: deepinfra/mistralai/Devstral-Small-2505 - api_key: os.environ/DEEPINFRA_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 5: GLM models (cheapest first) - # ═══════════════════════════════════════════════ - - # GLM-4.6 via DeepInfra ($0.60 in / $1.90 out per M) - - model_name: glm-4.6 - litellm_params: - model: deepinfra/zai-org/GLM-4.6 - api_key: os.environ/DEEPINFRA_API_KEY - - # GLM-4.7 via DeepInfra ($0.40 in / $1.75 out per M) - - model_name: glm-4.7 - litellm_params: - model: deepinfra/zai-org/GLM-4.7 - api_key: os.environ/DEEPINFRA_API_KEY - - # GLM-4.7 fallback via SiliconFlow - - model_name: glm-4.7 - litellm_params: - model: openai/THUDM/GLM-4-32B-0414 - api_base: https://api.siliconflow.com/v1 - api_key: os.environ/SILICONFLOW_API_KEY - - # GLM-5 via DeepInfra ($0.80 in / $2.56 out per M) - - model_name: glm-5 - litellm_params: - model: deepinfra/zai-org/GLM-5 - api_key: os.environ/DEEPINFRA_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 6: Kimi K2 (cheapest first) - # ═══════════════════════════════════════════════ - - # Kimi K2 via DeepInfra ($0.50 in / $2.00 out per M) - - model_name: kimi-k2 - litellm_params: - model: deepinfra/moonshotai/Kimi-K2-Instruct-0905 - api_key: os.environ/DEEPINFRA_API_KEY - - # Kimi K2 fallback via SiliconFlow ($0.58 in / $2.29 out per M) - - model_name: kimi-k2 - litellm_params: - model: openai/moonshotai/Kimi-K2-Instruct-0905 - api_base: https://api.siliconflow.com/v1 - api_key: os.environ/SILICONFLOW_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 7: SiliconFlow (Qwen) - # ═══════════════════════════════════════════════ - - # Qwen3 Coder 480B MoE via SiliconFlow ($1.14 in / $2.28 out per M) - - model_name: qwen3-coder - litellm_params: - model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct - api_base: https://api.siliconflow.com/v1 - api_key: os.environ/SILICONFLOW_API_KEY - - # Qwen3 Coder 30B — cheaper alternative for simpler tasks - - model_name: qwen3-coder-30b - litellm_params: - model: openai/Qwen/Qwen3-Coder-30B-A3B-Instruct - api_base: https://api.siliconflow.com/v1 - api_key: os.environ/SILICONFLOW_API_KEY - - # ═══════════════════════════════════════════════ - # TIER 8: OpenRouter (most expensive, widest selection) - # ═══════════════════════════════════════════════ - - # Kimi K2.5 — DeepInfra is cheapest ($0.45 in / $2.25 out per M) - - model_name: kimi-k2.5 - litellm_params: - model: deepinfra/moonshotai/Kimi-K2.5 - api_key: os.environ/DEEPINFRA_API_KEY - - # Kimi K2.5 fallback via OpenRouter - - model_name: kimi-k2.5 - litellm_params: - model: openrouter/moonshotai/kimi-k2.5 - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: minimax-m2.5 - litellm_params: - model: openrouter/minimax/minimax-m2.5 - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: gpt-4.1-mini - litellm_params: - model: openrouter/openai/gpt-4.1-mini - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: gemini-3-flash-preview - litellm_params: - model: openrouter/google/gemini-3-flash-preview - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: trinity-large-preview - litellm_params: - model: openrouter/arcee-ai/trinity-large-preview - api_key: os.environ/OPENROUTER_API_KEY - - # --- OpenRouter premium models --- - - model_name: gemini-2.5-pro - litellm_params: - model: openrouter/google/gemini-2.5-pro-preview - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: claude-sonnet - litellm_params: - model: openrouter/anthropic/claude-sonnet-4 - api_key: os.environ/OPENROUTER_API_KEY - - - model_name: gpt-4.1 - litellm_params: - model: openrouter/openai/gpt-4.1 - api_key: os.environ/OPENROUTER_API_KEY - - # DeepSeek V3.2 last-resort fallback via OpenRouter - - model_name: deepseek-v3.2 - litellm_params: - model: openrouter/deepseek/deepseek-chat-v3-0324 - api_key: os.environ/OPENROUTER_API_KEY - -general_settings: - master_key: os.environ/LITELLM_MASTER_KEY - - # ── Model group fallbacks (when model misbehaves mid-stream) ── - fallbacks: - - deepseek-v3.2: [gpt-oss, kimi-k2] - - gpt-oss: [deepseek-v3.2, nemotron-super] - - kimi-k2: [deepseek-v3.2, gpt-oss] - - kimi-k2.5: [deepseek-v3.2, gpt-oss] - - glm-4.7: [deepseek-v3.2, gpt-oss] - - glm-5: [deepseek-v3.2, gpt-oss] - -litellm_settings: - drop_params: true - set_verbose: false - num_retries: 2 - request_timeout: 600 - - # ── Response caching via Valkey (reuses SearXNG's instance) ── - cache: true - cache_params: - type: redis - host: valkey - port: 6379 - ttl: 3600 - - # ── Budget limit: $3/day to prevent surprise bills ── - # max_budget: 3.0 - # budget_duration: "1d" diff --git a/new-api/CHANNELS.md b/new-api/CHANNELS.md deleted file mode 100644 index 231b1a3..0000000 --- a/new-api/CHANNELS.md +++ /dev/null @@ -1,147 +0,0 @@ -# new-api Channel Configuration - -After first start, access the new-api web UI at `http://:4000` to configure channels. - -Default admin credentials: `root` / `123456` — **change immediately**. - -## API Token for Open WebUI - -Create an API token in new-api's token management. Use this token as `OPENWEBUI_API_KEY` in `.env`. - -## Channels to Create - -Configure each channel via **Channels > Add Channel** in the web UI. - -### 1. DeepInfra (Priority 1) - -| Field | Value | -|---|---| -| Name | DeepInfra | -| Type | OpenAI | -| Base URL | `https://api.deepinfra.com/v1/openai` | -| Key | `$DEEPINFRA_API_KEY` | -| Priority | 1 | -| Models | See model mapping below | - -### 2. SiliconFlow (Priority 2) - -| Field | Value | -|---|---| -| Name | SiliconFlow | -| Type | OpenAI | -| Base URL | `https://api.siliconflow.com/v1` | -| Key | `$SILICONFLOW_API_KEY` | -| Priority | 2 | -| Models | See model mapping below | - -### 3. OpenRouter (Priority 3) - -| Field | Value | -|---|---| -| Name | OpenRouter | -| Type | OpenAI | -| Base URL | `https://openrouter.ai/api/v1` | -| Key | `$OPENROUTER_API_KEY` | -| Priority | 3 | -| Models | See model mapping below | - -### 4. Groq (Priority 1) - -| Field | Value | -|---|---| -| Name | Groq | -| Type | OpenAI | -| Base URL | `https://api.groq.com/openai/v1` | -| Key | `$GROQ_API_KEY` | -| Priority | 1 | -| Models | `llama-3.3-70b` | - -### 5. Cerebras (Priority 1) - -| Field | Value | -|---|---| -| Name | Cerebras | -| Type | OpenAI | -| Base URL | `https://api.cerebras.ai/v1` | -| Key | `$CEREBRAS_API_KEY` | -| Priority | 1 | -| Models | `llama-3.3-70b-cerebras` | - -## Model Mapping per Channel - -new-api uses model aliasing: the "model name" is what clients see, the "actual model" is what's sent to the provider. - -### DeepInfra Models - -| Client Model Name | Actual Provider Model | -|---|---| -| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` | -| `deepseek-r1` | `deepseek-ai/DeepSeek-R1` | -| `gpt-oss` | `openai/gpt-oss-120b` | -| `gpt-oss-20b` | `openai/gpt-oss-20b` | -| `nemotron-super` | `nvidia/Llama-3.3-Nemotron-Super-49B-v1.5` | -| `nemotron-nano` | `nvidia/NVIDIA-Nemotron-Nano-9B-v2` | -| `devstral` | `mistralai/Devstral-Small-2505` | -| `glm-4.6` | `zai-org/GLM-4.6` | -| `glm-4.7` | `zai-org/GLM-4.7` | -| `glm-5` | `zai-org/GLM-5` | -| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` | -| `kimi-k2.5` | `moonshotai/Kimi-K2.5` | -| `deepseek-v3-free` | `deepseek-ai/DeepSeek-V3` | - -### SiliconFlow Models - -| Client Model Name | Actual Provider Model | -|---|---| -| `deepseek-v3.2` | `deepseek-ai/DeepSeek-V3.2` | -| `glm-4.7` | `THUDM/GLM-4-32B-0414` | -| `kimi-k2` | `moonshotai/Kimi-K2-Instruct-0905` | -| `qwen3-coder` | `Qwen/Qwen3-Coder-480B-A35B-Instruct` | -| `qwen3-coder-30b` | `Qwen/Qwen3-Coder-30B-A3B-Instruct` | - -### OpenRouter Models - -| Client Model Name | Actual Provider Model | -|---|---| -| `deepseek-v3.2` | `deepseek/deepseek-chat-v3-0324` | -| `deepseek-v3-free` | `deepseek/deepseek-chat-v3-0324:free` | -| `kimi-k2.5` | `moonshotai/kimi-k2.5` | -| `minimax-m2.5` | `minimax/minimax-m2.5` | -| `gpt-4.1-mini` | `openai/gpt-4.1-mini` | -| `gpt-4.1` | `openai/gpt-4.1` | -| `gemini-3-flash-preview` | `google/gemini-3-flash-preview` | -| `gemini-2.5-pro` | `google/gemini-2.5-pro-preview` | -| `claude-sonnet` | `anthropic/claude-sonnet-4` | -| `trinity-large-preview` | `arcee-ai/trinity-large-preview` | - -### Groq Models - -| Client Model Name | Actual Provider Model | -|---|---| -| `llama-3.3-70b` | `llama-3.3-70b-versatile` | - -### Cerebras Models - -| Client Model Name | Actual Provider Model | -|---|---| -| `llama-3.3-70b-cerebras` | `llama-3.3-70b` | - -## Fallback Behavior - -new-api handles fallbacks via priority levels: -- When a model exists on multiple channels, the highest priority (lowest number) channel is tried first -- If it fails, it automatically falls back to the next priority level - -For example, `deepseek-v3.2` exists on: -1. DeepInfra (priority 1) — tried first -2. SiliconFlow (priority 2) — fallback -3. OpenRouter (priority 3) — last resort - -## Grafana Setup - -After first start, access Grafana at `http://:3001`: -1. Login with `admin` / `$GRAFANA_ADMIN_PASSWORD` -2. Add data source: **Prometheus** with URL `http://victoriametrics:8428` -3. Import dashboards: - - Node Exporter Full: dashboard ID `1860` - - Redis: dashboard ID `763` diff --git a/new-api/init-channels.sh b/new-api/init-channels.sh deleted file mode 100755 index 06363e9..0000000 --- a/new-api/init-channels.sh +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env bash -# Configures new-api channels and API token via the admin API. -# Run once after first boot: ./new-api/init-channels.sh -# -# Requires these env vars (or .env file in project root): -# NEW_API_PASSWORD - admin password -# DEEPINFRA_API_KEY -# SILICONFLOW_API_KEY -# OPENROUTER_API_KEY -# GROQ_API_KEY -# CEREBRAS_API_KEY -# -# Optional: -# NEW_API_USERNAME - admin username (default: root) -# NEW_API_BASE - API base URL (default: http://localhost:4000) - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -ENV_FILE="${SCRIPT_DIR}/../.env" - -# Load .env if present -if [[ -f "$ENV_FILE" ]]; then - set -a - # shellcheck disable=SC1090 - source "$ENV_FILE" - set +a -fi - -API_BASE="${NEW_API_BASE:-http://localhost:4000}" -USERNAME="${NEW_API_USERNAME:-root}" -PASSWORD="${NEW_API_PASSWORD:?Set NEW_API_PASSWORD to the admin password}" -COOKIE_JAR=$(mktemp) -USER_ID="" -trap 'rm -f "$COOKIE_JAR"' EXIT - -# ── Login and get user ID ─────────────────────────────── -login() { - echo "Logging in as ${USERNAME}..." - local resp - resp=$(curl -s -c "$COOKIE_JAR" "${API_BASE}/api/user/login" \ - -H "Content-Type: application/json" \ - -d "$(python3 -c " -import json, sys -print(json.dumps({'username': sys.argv[1], 'password': sys.argv[2]})) -" "$USERNAME" "$PASSWORD")") - - local success - success=$(echo "$resp" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success', False))") - if [[ "$success" != "True" ]]; then - echo "ERROR: Login failed: ${resp}" - exit 1 - fi - - USER_ID=$(echo "$resp" | python3 -c "import sys,json; print(json.load(sys.stdin)['data']['id'])") - echo " Logged in (user ID: ${USER_ID})." -} - -# ── API call helper (cookie + New-Api-User header) ───── -api_call() { - local endpoint="$1" - shift - curl -s -b "$COOKIE_JAR" \ - -H "New-Api-User: ${USER_ID}" \ - -H "Content-Type: application/json" \ - "${API_BASE}${endpoint}" "$@" -} - -# ── Create channel ────────────────────────────────────── -create_channel() { - local name="$1" type="$2" key="$3" base_url="$4" priority="$5" models="$6" model_mapping="$7" - - echo "Creating channel: ${name} (priority ${priority})..." - - local payload - payload=$(python3 -c " -import json, sys -print(json.dumps({ - 'type': int(sys.argv[1]), - 'name': sys.argv[2], - 'key': sys.argv[3], - 'base_url': sys.argv[4], - 'models': sys.argv[5], - 'model_mapping': sys.argv[6], - 'priority': int(sys.argv[7]), - 'status': 1, - 'group': 'default', - 'weight': 1, - 'auto_ban': 1 -})) -" "$type" "$name" "$key" "$base_url" "$models" "$model_mapping" "$priority") - - local resp success - resp=$(api_call "/api/channel/" -d "$payload") - - success=$(echo "$resp" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success', False))") - if [[ "$success" == "True" ]]; then - echo " OK" - else - echo " FAILED: ${resp}" | head -c 500 - echo - fi -} - -# ── Wait for new-api ──────────────────────────────────── -echo "Waiting for new-api at ${API_BASE}..." -for i in $(seq 1 30); do - if curl -sf "${API_BASE}/" > /dev/null 2>&1; then - echo "new-api is ready." - break - fi - if [[ "$i" == "30" ]]; then - echo "ERROR: new-api did not become ready in time." - exit 1 - fi - sleep 2 -done - -# ── Login ─────────────────────────────────────────────── -login - -# ── Generate system access token for future use ───────── -echo "" -echo "Generating system access token..." -ACCESS_TOKEN_RESP=$(api_call "/api/user/token") -ACCESS_TOKEN=$(echo "$ACCESS_TOKEN_RESP" | python3 -c " -import sys, json -data = json.load(sys.stdin) -if data.get('success'): - print(data.get('data', '')) -else: - print('') -" 2>/dev/null || echo "") - -if [[ -n "$ACCESS_TOKEN" ]]; then - echo " Access token: ${ACCESS_TOKEN}" - echo " Save as NEW_API_ACCESS_TOKEN in .env for future API use." - echo " Usage: -H 'Authorization: Bearer ${ACCESS_TOKEN}' -H 'New-Api-User: ${USER_ID}'" -else - echo " Could not generate access token (non-critical, using session)." -fi - -# ── Channels ──────────────────────────────────────────── - -create_channel "DeepInfra" 1 \ - "${DEEPINFRA_API_KEY:?}" \ - "https://api.deepinfra.com/v1/openai" \ - 1 \ - "deepseek-v3.2,deepseek-r1,gpt-oss,gpt-oss-20b,nemotron-super,nemotron-nano,devstral,glm-4.6,glm-4.7,glm-5,kimi-k2,kimi-k2.5" \ - '{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","deepseek-r1":"deepseek-ai/DeepSeek-R1","gpt-oss":"openai/gpt-oss-120b","gpt-oss-20b":"openai/gpt-oss-20b","nemotron-super":"nvidia/Llama-3.3-Nemotron-Super-49B-v1.5","nemotron-nano":"nvidia/NVIDIA-Nemotron-Nano-9B-v2","devstral":"mistralai/Devstral-Small-2505","glm-4.6":"zai-org/GLM-4.6","glm-4.7":"zai-org/GLM-4.7","glm-5":"zai-org/GLM-5","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","kimi-k2.5":"moonshotai/Kimi-K2.5"}' - -create_channel "SiliconFlow" 1 \ - "${SILICONFLOW_API_KEY:?}" \ - "https://api.siliconflow.com/v1" \ - 2 \ - "deepseek-v3.2,glm-4.7,kimi-k2,qwen3-coder,qwen3-coder-30b" \ - '{"deepseek-v3.2":"deepseek-ai/DeepSeek-V3.2","glm-4.7":"THUDM/GLM-4-32B-0414","kimi-k2":"moonshotai/Kimi-K2-Instruct-0905","qwen3-coder":"Qwen/Qwen3-Coder-480B-A35B-Instruct","qwen3-coder-30b":"Qwen/Qwen3-Coder-30B-A3B-Instruct"}' - -create_channel "OpenRouter" 1 \ - "${OPENROUTER_API_KEY:?}" \ - "https://openrouter.ai/api/v1" \ - 3 \ - "deepseek-v3.2,deepseek-v3-free,kimi-k2.5,minimax-m2.5,gpt-4.1-mini,gpt-4.1,gemini-3-flash-preview,gemini-2.5-pro,claude-sonnet,trinity-large-preview" \ - '{"deepseek-v3.2":"deepseek/deepseek-chat-v3-0324","deepseek-v3-free":"deepseek/deepseek-chat-v3-0324:free","kimi-k2.5":"moonshotai/kimi-k2.5","minimax-m2.5":"minimax/minimax-m2.5","gpt-4.1-mini":"openai/gpt-4.1-mini","gpt-4.1":"openai/gpt-4.1","gemini-3-flash-preview":"google/gemini-3-flash-preview","gemini-2.5-pro":"google/gemini-2.5-pro-preview","claude-sonnet":"anthropic/claude-sonnet-4","trinity-large-preview":"arcee-ai/trinity-large-preview"}' - -create_channel "Groq" 1 \ - "${GROQ_API_KEY:?}" \ - "https://api.groq.com/openai/v1" \ - 1 \ - "llama-3.3-70b" \ - '{"llama-3.3-70b":"llama-3.3-70b-versatile"}' - -create_channel "Cerebras" 1 \ - "${CEREBRAS_API_KEY:?}" \ - "https://api.cerebras.ai/v1" \ - 1 \ - "llama-3.3-70b-cerebras" \ - '{"llama-3.3-70b-cerebras":"llama-3.3-70b"}' - -# ── Create API token for Open WebUI ───────────────────── -echo "" -echo "Creating API token for Open WebUI..." -TOKEN_RESP=$(api_call "/api/token/" -d "$(python3 -c " -import json -print(json.dumps({ - 'name': 'open-webui', - 'remain_quota': 0, - 'unlimited_quota': True -})) -")") - -TOKEN_KEY=$(echo "$TOKEN_RESP" | python3 -c " -import sys, json -data = json.load(sys.stdin) -if data.get('success'): - print(data['data']['key']) -else: - print('FAILED: ' + data.get('message', 'unknown error')) -" 2>/dev/null || echo "FAILED: could not parse response") - -echo "" -echo "══════════════════════════════════════" -echo "Setup complete!" -echo "" -if [[ "$TOKEN_KEY" != FAILED* ]]; then - echo "Open WebUI API key: ${TOKEN_KEY}" - echo "Set OPENWEBUI_API_KEY=${TOKEN_KEY} in your .env" - echo "" - echo "Test:" - echo " curl ${API_BASE}/v1/chat/completions \\" - echo " -H 'Authorization: Bearer ${TOKEN_KEY}' \\" - echo " -H 'Content-Type: application/json' \\" - echo " -d '{\"model\":\"deepseek-v3.2\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}'" -else - echo "Token creation: ${TOKEN_KEY}" - echo "Create a token manually in the new-api UI." -fi -echo "══════════════════════════════════════"