diff --git a/docker-compose.yml b/docker-compose.yml index 5b642ed..8e78bff 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ services: - # ── Cache for SearXNG & LLM Gateway ── + # ── Cache for SearXNG & Janus ── valkey: image: valkey/valkey:8-alpine command: valkey-server --save 30 1 --loglevel warning @@ -39,13 +39,13 @@ services: restart: unless-stopped # ── LLM API proxy ── - llm-gateway: - build: ./llm-gateway + janus: + image: ghcr.io/rslib/janus:latest ports: - "0.0.0.0:4000:3000" volumes: - - ./data/llm-gateway:/data - - ./llm-gateway.yaml:/etc/llm-gateway/config.yaml:ro + - ./data/janus:/data + - ./janus.yaml:/etc/janus/config.yaml:ro environment: - SESSION_SECRET=${SESSION_SECRET} - ADMIN_USERNAME=${ADMIN_USERNAME} @@ -77,7 +77,7 @@ services: - "0.0.0.0:3000:8080" environment: - OLLAMA_BASE_URL= - - OPENAI_API_BASE_URL=http://llm-gateway:3000/v1 + - OPENAI_API_BASE_URL=http://janus:3000/v1 - OPENAI_API_KEY=${OPENWEBUI_API_KEY} - ENABLE_RAG_WEB_SEARCH=true - RAG_WEB_SEARCH_ENGINE=searxng @@ -86,7 +86,7 @@ services: - CHROMA_HTTP_PORT=8000 - WEBUI_AUTH=true depends_on: - llm-gateway: + janus: condition: service_healthy restart: unless-stopped diff --git a/janus.yaml b/janus.yaml new file mode 100644 index 0000000..932b3ad --- /dev/null +++ b/janus.yaml @@ -0,0 +1,372 @@ +server: + listen: "0.0.0.0:3000" + request_timeout: 300s + max_request_body_mb: 10 + session_secret: "${SESSION_SECRET}" + default_admin: + username: "${ADMIN_USERNAME}" + password: "${ADMIN_PASSWORD}" + +tokens: + - name: "open-webui" + key: "${OPENWEBUI_API_KEY}" + rate_limit_rpm: 0 # unlimited + daily_budget_usd: 0 + - name: "opencode" + key: "${OPENCODE_API_KEY}" + rate_limit_rpm: 0 # unlimited + daily_budget_usd: 0 + +pricing_lookup: + # url: "https://raw.githubusercontent.com/pydantic/genai-prices/main/prices/data_slim.json" # default + refresh_interval: 6h + +database: + path: "/data/janus.db" + retention_days: 90 + +debug: + enabled: true + retention_days: 90 + # data_dir: "/data" # defaults to directory of database.path + # max_body_bytes: 0 # 0 = unlimited (save full bodies) + +cache: + enabled: true + address: "valkey:6379" + ttl: 3600 + +providers: + - name: deepinfra + base_url: "https://api.deepinfra.com/v1/openai" + api_key: "${DEEPINFRA_API_KEY}" + priority: 1 + timeout: 120s + - name: siliconflow + base_url: "https://api.siliconflow.com/v1" + api_key: "${SILICONFLOW_API_KEY}" + priority: 2 + timeout: 120s + - name: openrouter + base_url: "https://openrouter.ai/api/v1" + api_key: "${OPENROUTER_API_KEY}" + priority: 3 + timeout: 120s + - name: groq + base_url: "https://api.groq.com/openai/v1" + api_key: "${GROQ_API_KEY}" + priority: 1 + timeout: 120s + - name: cerebras + base_url: "https://api.cerebras.ai/v1" + api_key: "${CEREBRAS_API_KEY}" + priority: 1 + timeout: 120s + +models: + # ═══ TIER 1: Free (OpenRouter free models, $0) ═══ + # NOTE: Commented out — free models are heavily rate-limited upstream. + # Uncomment if you want best-effort free access. + # - name: "llama-3.3-70b-free" + # routes: + # - provider: openrouter + # model: "meta-llama/llama-3.3-70b-instruct:free" + # - name: "deepseek-r1-free" + # routes: + # - provider: openrouter + # model: "deepseek/deepseek-r1-0528:free" + # - name: "gpt-oss-free" + # routes: + # - provider: openrouter + # model: "openai/gpt-oss-120b:free" + # - name: "gpt-oss-20b-free" + # routes: + # - provider: openrouter + # model: "openai/gpt-oss-20b:free" + # - name: "qwen3-coder-free" + # routes: + # - provider: openrouter + # model: "qwen/qwen3-coder:free" + # - name: "qwen3-235b-free" + # routes: + # - provider: openrouter + # model: "qwen/qwen3-235b-a22b-thinking-2507" + # - name: "glm-4.5-air-free" + # routes: + # - provider: openrouter + # model: "z-ai/glm-4.5-air:free" + # - name: "nemotron-nano-free" + # routes: + # - provider: openrouter + # model: "nvidia/nemotron-nano-9b-v2:free" + # - name: "trinity-large-free" + # routes: + # - provider: openrouter + # model: "arcee-ai/trinity-large-preview:free" + # - name: "mistral-small-free" + # routes: + # - provider: openrouter + # model: "mistralai/mistral-small-3.1-24b-instruct:free" + # - name: "gemma-3-27b-free" + # routes: + # - provider: openrouter + # model: "google/gemma-3-27b-it:free" + # - name: "step-3.5-flash-free" + # routes: + # - provider: openrouter + # model: "stepfun/step-3.5-flash:free" + + # ═══ TIER 2: Low cost (Groq, Cerebras — free tier with rate limits) ═══ + - name: "llama-3.1-8b" + routes: + - provider: groq + model: "llama-3.1-8b-instant" + pricing: { input: 0.05, output: 0.08 } + - provider: cerebras + model: "llama3.1-8b" + pricing: { input: 0.10, output: 0.10 } + - provider: deepinfra + model: "meta-llama/Meta-Llama-3.1-8B-Instruct" + pricing: { input: 0.03, output: 0.05 } + + - name: "llama-3.3-70b" + routes: + - provider: deepinfra + model: "meta-llama/Llama-3.3-70B-Instruct-Turbo" + pricing: { input: 0.23, output: 0.40 } + - provider: groq + model: "llama-3.3-70b-versatile" + pricing: { input: 0.59, output: 0.79 } + - provider: cerebras + model: "llama-3.3-70b" + pricing: { input: 0.85, output: 1.20 } + + - name: "gpt-oss" + routes: + - provider: groq + model: "openai/gpt-oss-120b" + pricing: { input: 0.15, output: 0.60 } + - provider: cerebras + model: "gpt-oss-120b" + pricing: { input: 0.35, output: 0.75 } + - provider: deepinfra + model: "openai/gpt-oss-120b" + pricing: { input: 0.05, output: 0.24 } + + - name: "gpt-oss-20b" + routes: + - provider: groq + model: "openai/gpt-oss-20b" + pricing: { input: 0.075, output: 0.30 } + - provider: deepinfra + model: "openai/gpt-oss-20b" + pricing: { input: 0.04, output: 0.16 } + + - name: "llama-4-scout" + routes: + - provider: groq + model: "meta-llama/llama-4-scout-17b-16e-instruct" + pricing: { input: 0.11, output: 0.34 } + + - name: "llama-4-maverick" + routes: + - provider: groq + model: "meta-llama/llama-4-maverick-17b-128e-instruct" + pricing: { input: 0.20, output: 0.60 } + + - name: "qwen3-32b" + routes: + - provider: groq + model: "qwen/qwen3-32b" + pricing: { input: 0.29, output: 0.59 } + - provider: cerebras + model: "qwen-3-32b" + + # ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══ + - name: "deepseek-v3.2" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-V3.2" + pricing: { input: 0.26, output: 0.38 } + - provider: siliconflow + model: "deepseek-ai/DeepSeek-V3.2" + pricing: { input: 0.27, output: 0.42 } + - provider: openrouter + model: "deepseek/deepseek-chat-v3-0324" + pricing: { input: 0.30, output: 0.88 } + + # ═══ TIER 4: Ultra-cheap DeepInfra ═══ + - name: "nemotron-super" + routes: + - provider: deepinfra + model: "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5" + pricing: { input: 0.10, output: 0.40 } + + - name: "nemotron-nano" + routes: + - provider: deepinfra + model: "nvidia/NVIDIA-Nemotron-Nano-9B-v2" + pricing: { input: 0.04, output: 0.16 } + + # ═══ TIER 5: DeepSeek R1 & reasoning ═══ + - name: "deepseek-r1" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1-0528" + - provider: openrouter + model: "deepseek/deepseek-r1" + + - name: "deepseek-r1-distill-llama-70b" + routes: + - provider: deepinfra + model: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + + - name: "devstral-small" + routes: + - provider: openrouter + model: "mistralai/devstral-small" + + - name: "devstral-medium" + routes: + - provider: openrouter + model: "mistralai/devstral-medium" + + # ═══ TIER 6: GLM ═══ + - name: "glm-4.6" + routes: + - provider: deepinfra + model: "zai-org/GLM-4.6" + pricing: { input: 0.60, output: 1.90 } + + - name: "glm-4.7" + routes: + - provider: deepinfra + model: "zai-org/GLM-4.7" + pricing: { input: 0.40, output: 1.75 } + - provider: cerebras + model: "zai-glm-4.7" + pricing: { input: 2.25, output: 2.75 } + - provider: siliconflow + model: "THUDM/GLM-4-32B-0414" + + - name: "glm-5" + routes: + - provider: deepinfra + model: "zai-org/GLM-5" + pricing: { input: 0.80, output: 2.56 } + + # ═══ TIER 7: Kimi ═══ + - name: "kimi-k2" + routes: + - provider: groq + model: "moonshotai/kimi-k2-instruct-0905" + pricing: { input: 1.00, output: 3.00 } + - provider: deepinfra + model: "moonshotai/Kimi-K2-Instruct-0905" + pricing: { input: 0.50, output: 2.00 } + - provider: siliconflow + model: "moonshotai/Kimi-K2-Instruct-0905" + pricing: { input: 0.58, output: 2.29 } + + - name: "kimi-k2.5" + routes: + - provider: deepinfra + model: "moonshotai/Kimi-K2.5" + pricing: { input: 0.45, output: 2.25 } + - provider: openrouter + model: "moonshotai/kimi-k2.5" + + # ═══ TIER 8: SiliconFlow (Qwen) ═══ + - name: "qwen3-coder" + routes: + - provider: siliconflow + model: "Qwen/Qwen3-Coder-480B-A35B-Instruct" + pricing: { input: 1.14, output: 2.28 } + + - name: "qwen3-coder-30b" + routes: + - provider: siliconflow + model: "Qwen/Qwen3-Coder-30B-A3B-Instruct" + + # ═══ TIER 9: OpenRouter premium (paid) ═══ + - name: "minimax-m2.5" + routes: + - provider: openrouter + model: "minimax/minimax-m2.5" + + - name: "gpt-4.1-mini" + routes: + - provider: openrouter + model: "openai/gpt-4.1-mini" + + - name: "gpt-4.1" + routes: + - provider: openrouter + model: "openai/gpt-4.1" + + - name: "gemini-3-flash-preview" + routes: + - provider: openrouter + model: "google/gemini-3-flash-preview" + + - name: "gemini-2.5-pro" + routes: + - provider: openrouter + model: "google/gemini-2.5-pro-preview" + + # ═══ TIER 10: Vision / Multimodal ═══ + - name: "gemma-3-4b" + routes: + - provider: openrouter + model: "google/gemma-3-4b-it" + pricing: { input: 0.017, output: 0.068 } + - provider: deepinfra + model: "google/gemma-3-4b-it" + pricing: { input: 0.04, output: 0.08 } + + - name: "gemma-3-12b" + routes: + - provider: openrouter + model: "google/gemma-3-12b-it" + pricing: { input: 0.03, output: 0.10 } + - provider: deepinfra + model: "google/gemma-3-12b-it" + pricing: { input: 0.04, output: 0.13 } + + - name: "gemma-3-27b" + routes: + - provider: openrouter + model: "google/gemma-3-27b-it" + pricing: { input: 0.04, output: 0.15 } + - provider: deepinfra + model: "google/gemma-3-27b-it" + pricing: { input: 0.08, output: 0.16 } + + - name: "qwen3-vl-8b" + routes: + - provider: openrouter + model: "qwen/qwen3-vl-8b-instruct" + pricing: { input: 0.08, output: 0.50 } + - provider: deepinfra + model: "Qwen/Qwen3-VL-8B-Instruct" + pricing: { input: 0.18, output: 0.69 } + + - name: "qwen3-vl-32b" + routes: + - provider: openrouter + model: "qwen/qwen3-vl-32b-instruct" + pricing: { input: 0.104, output: 0.416 } + + - name: "qwen2.5-vl-32b" + routes: + - provider: openrouter + model: "qwen/qwen2.5-vl-32b-instruct" + pricing: { input: 0.05, output: 0.22 } + - provider: deepinfra + model: "Qwen/Qwen2.5-VL-32B-Instruct" + pricing: { input: 0.20, output: 0.60 } + + - name: "claude-sonnet" + routes: + - provider: openrouter + model: "anthropic/claude-sonnet-4" diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml index 37588dc..7d44c13 100644 --- a/monitoring/prometheus.yml +++ b/monitoring/prometheus.yml @@ -2,9 +2,9 @@ global: scrape_interval: 30s scrape_configs: - - job_name: 'llm-gateway' + - job_name: 'janus' static_configs: - - targets: ['llm-gateway:3000'] + - targets: ['janus:3000'] - job_name: 'node' static_configs: