diff --git a/.env.example b/.env.example index 5c7b19d..511c277 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,9 @@ SILICONFLOW_API_KEY=sk-... DEEPINFRA_API_KEY=... GROQ_API_KEY=gsk_... CEREBRAS_API_KEY=... +PERPLEXITY_API_KEY=pplx-... +# Ollama Cloud API key (https://ollama.com → Settings → Keys) +OLLAMA_CLOUD_API_KEY=... # --- Grafana --- GRAFANA_ADMIN_PASSWORD=change-me-to-a-secure-password diff --git a/docker-compose.yml b/docker-compose.yml index 8e78bff..651339d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,6 +57,8 @@ services: - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - GROQ_API_KEY=${GROQ_API_KEY} - CEREBRAS_API_KEY=${CEREBRAS_API_KEY} + - PERPLEXITY_API_KEY=${PERPLEXITY_API_KEY} + - OLLAMA_CLOUD_API_KEY=${OLLAMA_CLOUD_API_KEY} depends_on: valkey: condition: service_healthy @@ -76,7 +78,6 @@ services: ports: - "0.0.0.0:3000:8080" environment: - - OLLAMA_BASE_URL= - OPENAI_API_BASE_URL=http://janus:3000/v1 - OPENAI_API_KEY=${OPENWEBUI_API_KEY} - ENABLE_RAG_WEB_SEARCH=true diff --git a/janus.yaml b/janus.yaml index 932b3ad..8b28bec 100644 --- a/janus.yaml +++ b/janus.yaml @@ -62,6 +62,16 @@ providers: api_key: "${CEREBRAS_API_KEY}" priority: 1 timeout: 120s + - name: perplexity + base_url: "https://api.perplexity.ai" + api_key: "${PERPLEXITY_API_KEY}" + priority: 1 + timeout: 120s + - name: ollama-cloud + base_url: "https://ollama.com/v1" + api_key: "${OLLAMA_CLOUD_API_KEY}" + priority: 1 + timeout: 120s models: # ═══ TIER 1: Free (OpenRouter free models, $0) ═══ @@ -143,6 +153,8 @@ models: - name: "gpt-oss" routes: + - provider: ollama-cloud + model: "gpt-oss:120b-cloud" - provider: groq model: "openai/gpt-oss-120b" pricing: { input: 0.15, output: 0.60 } @@ -155,6 +167,8 @@ models: - name: "gpt-oss-20b" routes: + - provider: ollama-cloud + model: "gpt-oss:20b-cloud" - provider: groq model: "openai/gpt-oss-20b" pricing: { input: 0.075, output: 0.30 } @@ -185,6 +199,8 @@ models: # ═══ TIER 3: DeepSeek V3.2 (cheapest flagship) ═══ - name: "deepseek-v3.2" routes: + - provider: ollama-cloud + model: "deepseek-v3.2:cloud" - provider: deepinfra model: "deepseek-ai/DeepSeek-V3.2" pricing: { input: 0.26, output: 0.38 } @@ -223,23 +239,31 @@ models: - name: "devstral-small" routes: + - provider: ollama-cloud + model: "devstral-small-2:24b-cloud" - provider: openrouter model: "mistralai/devstral-small" - name: "devstral-medium" routes: + - provider: ollama-cloud + model: "devstral-2:123b-cloud" - provider: openrouter model: "mistralai/devstral-medium" # ═══ TIER 6: GLM ═══ - name: "glm-4.6" routes: + - provider: ollama-cloud + model: "glm-4.6:cloud" - provider: deepinfra model: "zai-org/GLM-4.6" pricing: { input: 0.60, output: 1.90 } - name: "glm-4.7" routes: + - provider: ollama-cloud + model: "glm-4.7:cloud" - provider: deepinfra model: "zai-org/GLM-4.7" pricing: { input: 0.40, output: 1.75 } @@ -251,6 +275,8 @@ models: - name: "glm-5" routes: + - provider: ollama-cloud + model: "glm-5:cloud" - provider: deepinfra model: "zai-org/GLM-5" pricing: { input: 0.80, output: 2.56 } @@ -270,6 +296,8 @@ models: - name: "kimi-k2.5" routes: + - provider: ollama-cloud + model: "kimi-k2.5:cloud" - provider: deepinfra model: "moonshotai/Kimi-K2.5" pricing: { input: 0.45, output: 2.25 } @@ -291,6 +319,8 @@ models: # ═══ TIER 9: OpenRouter premium (paid) ═══ - name: "minimax-m2.5" routes: + - provider: ollama-cloud + model: "minimax-m2.5:cloud" - provider: openrouter model: "minimax/minimax-m2.5" @@ -306,6 +336,8 @@ models: - name: "gemini-3-flash-preview" routes: + - provider: ollama-cloud + model: "gemini-3-flash-preview:cloud" - provider: openrouter model: "google/gemini-3-flash-preview" @@ -370,3 +402,26 @@ models: routes: - provider: openrouter model: "anthropic/claude-sonnet-4" + + # ═══ TIER 11: Perplexity (online search models) ═══ + - name: "sonar" + routes: + - provider: perplexity + model: "sonar" + pricing: { input: 1.00, output: 1.00 } + + - name: "sonar-pro" + routes: + - provider: perplexity + model: "sonar-pro" + pricing: { input: 3.00, output: 15.00 } + + - name: "sonar-reasoning" + routes: + - provider: perplexity + model: "sonar-reasoning" + pricing: { input: 1.00, output: 5.00 } + + # ═══ TIER 12: Ollama Cloud ═══ + # Requires Ollama Cloud Pro ($20/mo) or Max ($100/mo) subscription. + # Get API key from: https://ollama.com → Settings → Keys