diff --git a/llm-gateway/cmd/gateway/main.go b/llm-gateway/cmd/gateway/main.go index a418431..b819d9c 100644 --- a/llm-gateway/cmd/gateway/main.go +++ b/llm-gateway/cmd/gateway/main.go @@ -90,6 +90,9 @@ func main() { } log.Printf("Registered %d models", len(cfg.Models)) + // Provider health tracker + healthTracker := provider.NewHealthTracker(5 * time.Minute) + // Auth store (static tokens checked in-memory, not seeded to DB) var staticTokens []auth.StaticToken for _, t := range cfg.Tokens { @@ -114,12 +117,20 @@ func main() { m := metrics.New() // Handlers - proxyHandler := proxy.NewHandler(registry, asyncLogger, c, m, cfg) + proxyHandler := proxy.NewHandler(registry, asyncLogger, c, m, cfg, healthTracker) modelsHandler := proxy.NewModelsHandler(registry) proxyAuth := proxy.NewAuthMiddleware(authStore) rateLimiter := proxy.NewRateLimiter(db) statsAPI := dashboard.NewStatsAPI(db, authStore) + statsAPI.SetHealthTracker(healthTracker) + if c != nil { + statsAPI.SetCache(c) + } dash := dashboard.NewDashboard(authStore, statsAPI) + dash.SetRegistry(registry) + if c != nil { + dash.SetCache(c) + } // Router r := chi.NewRouter() @@ -172,6 +183,8 @@ func main() { // Dashboard pages (HTMX) r.Get("/dashboard", dash.DashboardPage) + r.Get("/logs", dash.LogsPage) + r.Get("/models", dash.ModelsPage) r.Get("/tokens", dash.TokensPage) r.Get("/settings", dash.SettingsPage) @@ -205,6 +218,11 @@ func main() { r.Get("/api/stats/providers", statsAPI.Providers) r.Get("/api/stats/tokens", statsAPI.Tokens) r.Get("/api/stats/timeseries", statsAPI.Timeseries) + r.Get("/api/stats/logs", statsAPI.Logs) + r.Get("/api/stats/latency", statsAPI.Latency) + r.Get("/api/stats/cost-breakdown", statsAPI.CostBreakdown) + r.Get("/api/stats/provider-health", statsAPI.ProviderHealthHandler) + r.Get("/api/stats/cache", statsAPI.CacheStats) // Admin-only: user management r.Group(func(r chi.Router) { diff --git a/llm-gateway/internal/cache/cache.go b/llm-gateway/internal/cache/cache.go index 9d9df2a..40852c5 100644 --- a/llm-gateway/internal/cache/cache.go +++ b/llm-gateway/internal/cache/cache.go @@ -56,6 +56,120 @@ func (c *Cache) Close() error { return c.client.Close() } +// CacheStats holds cache statistics from the Valkey/Redis server. +type CacheStats struct { + Hits int64 `json:"hits"` + Misses int64 `json:"misses"` + HitRate float64 `json:"hit_rate"` + MemoryUsed string `json:"memory_used"` + Keys int64 `json:"keys"` + Connected bool `json:"connected"` +} + +// Stats returns cache statistics by querying Valkey/Redis INFO. +func (c *Cache) Stats(ctx context.Context) *CacheStats { + stats := &CacheStats{} + + // Check connectivity + if err := c.client.Ping(ctx).Err(); err != nil { + return stats + } + stats.Connected = true + + // Parse INFO stats for hits/misses + info, err := c.client.Info(ctx, "stats").Result() + if err == nil { + stats.Hits = parseInfoInt(info, "keyspace_hits") + stats.Misses = parseInfoInt(info, "keyspace_misses") + total := stats.Hits + stats.Misses + if total > 0 { + stats.HitRate = float64(stats.Hits) / float64(total) + } + } + + // Parse INFO memory + memInfo, err := c.client.Info(ctx, "memory").Result() + if err == nil { + stats.MemoryUsed = parseInfoString(memInfo, "used_memory_human") + } + + // Parse INFO keyspace + ksInfo, err := c.client.Info(ctx, "keyspace").Result() + if err == nil { + stats.Keys = parseKeyspaceKeys(ksInfo) + } + + return stats +} + +func parseInfoInt(info, key string) int64 { + prefix := key + ":" + for _, line := range splitLines(info) { + if len(line) > len(prefix) && line[:len(prefix)] == prefix { + var v int64 + fmt.Sscanf(line[len(prefix):], "%d", &v) + return v + } + } + return 0 +} + +func parseInfoString(info, key string) string { + prefix := key + ":" + for _, line := range splitLines(info) { + if len(line) > len(prefix) && line[:len(prefix)] == prefix { + val := line[len(prefix):] + // Trim trailing \r + if len(val) > 0 && val[len(val)-1] == '\r' { + val = val[:len(val)-1] + } + return val + } + } + return "" +} + +func parseKeyspaceKeys(info string) int64 { + // Format: db0:keys=123,expires=45,avg_ttl=6789 + for _, line := range splitLines(info) { + if len(line) > 3 && line[:2] == "db" { + prefix := "keys=" + idx := -1 + for i := 0; i <= len(line)-len(prefix); i++ { + if line[i:i+len(prefix)] == prefix { + idx = i + len(prefix) + break + } + } + if idx >= 0 { + end := idx + for end < len(line) && line[end] >= '0' && line[end] <= '9' { + end++ + } + var v int64 + fmt.Sscanf(line[idx:end], "%d", &v) + return v + } + } + } + return 0 +} + +func splitLines(s string) []string { + var lines []string + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + lines = append(lines, s[start:i]) + start = i + 1 + } + } + if start < len(s) { + lines = append(lines, s[start:]) + } + return lines +} + func (c *Cache) cacheKey(model string, requestBody []byte) string { h := sha256.New() h.Write([]byte(model)) diff --git a/llm-gateway/internal/dashboard/api.go b/llm-gateway/internal/dashboard/api.go index 452ef27..3bb1b7a 100644 --- a/llm-gateway/internal/dashboard/api.go +++ b/llm-gateway/internal/dashboard/api.go @@ -3,9 +3,13 @@ package dashboard import ( "encoding/json" "net/http" + "sort" + "strconv" "time" "llm-gateway/internal/auth" + "llm-gateway/internal/cache" + "llm-gateway/internal/provider" "llm-gateway/internal/storage" ) @@ -52,15 +56,70 @@ type TokenUsageStats struct { CostUSD float64 `json:"cost_usd"` } +// RequestLogEntry represents a single request log row. +type RequestLogEntry struct { + Timestamp int64 `json:"timestamp"` + TokenName string `json:"token_name"` + Model string `json:"model"` + Provider string `json:"provider"` + ProviderModel string `json:"provider_model"` + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` + CostUSD float64 `json:"cost_usd"` + LatencyMS int64 `json:"latency_ms"` + Status string `json:"status"` + ErrorMessage string `json:"error_message"` + Streaming bool `json:"streaming"` + Cached bool `json:"cached"` +} + +// LogsResult holds paginated logs. +type LogsResult struct { + Logs []RequestLogEntry `json:"logs"` + Page int `json:"page"` + TotalPages int `json:"total_pages"` + Total int `json:"total"` +} + +// LatencyResult holds latency percentiles. +type LatencyResult struct { + P50 float64 `json:"p50"` + P95 float64 `json:"p95"` + P99 float64 `json:"p99"` + Avg float64 `json:"avg"` + Min float64 `json:"min"` + Max float64 `json:"max"` +} + +// CostBreakdownEntry holds cost data grouped by day and dimension. +type CostBreakdownEntry struct { + Day string `json:"day"` + GroupBy string `json:"group_by"` + CostUSD float64 `json:"cost_usd"` + Requests int `json:"requests"` +} + type StatsAPI struct { - db *storage.DB - authStore *auth.Store + db *storage.DB + authStore *auth.Store + healthTracker *provider.HealthTracker + cache *cache.Cache } func NewStatsAPI(db *storage.DB, authStore *auth.Store) *StatsAPI { return &StatsAPI{db: db, authStore: authStore} } +// SetHealthTracker sets the provider health tracker. +func (s *StatsAPI) SetHealthTracker(ht *provider.HealthTracker) { + s.healthTracker = ht +} + +// SetCache sets the cache for stats. +func (s *StatsAPI) SetCache(c *cache.Cache) { + s.cache = c +} + // TokenNamesForUser returns the token names that belong to the user. // Admins get nil (no filter), non-admins get their token names. func (s *StatsAPI) TokenNamesForUser(user *auth.User) []string { @@ -227,6 +286,217 @@ func (s *StatsAPI) GetTokenUsage(tokenNames []string) []TokenUsageStats { return results } +// GetLogs returns paginated request logs with filters. +func (s *StatsAPI) GetLogs(tokenNames []string, page int, model, token, status string) *LogsResult { + if page < 1 { + page = 1 + } + limit := 50 + offset := (page - 1) * limit + + tokenFilter, filterArgs := buildTokenFilter(tokenNames) + + where := "WHERE 1=1" + tokenFilter + args := make([]any, 0) + args = append(args, filterArgs...) + + if model != "" { + where += " AND model = ?" + args = append(args, model) + } + if token != "" { + where += " AND token_name = ?" + args = append(args, token) + } + if status != "" { + where += " AND status = ?" + args = append(args, status) + } + + // Get total count + var total int + countArgs := make([]any, len(args)) + copy(countArgs, args) + s.db.QueryRow("SELECT COUNT(*) FROM request_logs "+where, countArgs...).Scan(&total) + + totalPages := (total + limit - 1) / limit + if totalPages < 1 { + totalPages = 1 + } + + // Get page + query := `SELECT timestamp, token_name, model, provider, provider_model, + input_tokens, output_tokens, cost_usd, latency_ms, status, + COALESCE(error_message, ''), streaming, cached + FROM request_logs ` + where + ` ORDER BY timestamp DESC LIMIT ? OFFSET ?` + args = append(args, limit, offset) + + rows, err := s.db.Query(query, args...) + if err != nil { + return &LogsResult{Logs: []RequestLogEntry{}, Page: page, TotalPages: totalPages, Total: total} + } + defer rows.Close() + + var logs []RequestLogEntry + for rows.Next() { + var l RequestLogEntry + var streaming, cached int + rows.Scan(&l.Timestamp, &l.TokenName, &l.Model, &l.Provider, &l.ProviderModel, + &l.InputTokens, &l.OutputTokens, &l.CostUSD, &l.LatencyMS, &l.Status, + &l.ErrorMessage, &streaming, &cached) + l.Streaming = streaming == 1 + l.Cached = cached == 1 + logs = append(logs, l) + } + if logs == nil { + logs = []RequestLogEntry{} + } + + return &LogsResult{ + Logs: logs, + Page: page, + TotalPages: totalPages, + Total: total, + } +} + +// GetDistinctModels returns distinct model names from logs. +func (s *StatsAPI) GetDistinctModels() []string { + rows, err := s.db.Query("SELECT DISTINCT model FROM request_logs ORDER BY model") + if err != nil { + return nil + } + defer rows.Close() + var models []string + for rows.Next() { + var m string + rows.Scan(&m) + models = append(models, m) + } + return models +} + +// GetDistinctTokens returns distinct token names from logs. +func (s *StatsAPI) GetDistinctTokens() []string { + rows, err := s.db.Query("SELECT DISTINCT token_name FROM request_logs ORDER BY token_name") + if err != nil { + return nil + } + defer rows.Close() + var tokens []string + for rows.Next() { + var t string + rows.Scan(&t) + tokens = append(tokens, t) + } + return tokens +} + +// GetLatency computes latency percentiles from request_logs. +func (s *StatsAPI) GetLatency(tokenNames []string, period, model, providerName string) *LatencyResult { + var since int64 + switch period { + case "7d": + since = time.Now().AddDate(0, 0, -7).Unix() + case "30d": + since = time.Now().AddDate(0, -1, 0).Unix() + default: + since = time.Now().Add(-24 * time.Hour).Unix() + } + + tokenFilter, filterArgs := buildTokenFilter(tokenNames) + + where := "WHERE timestamp >= ? AND status = 'success'" + tokenFilter + args := []any{since} + args = append(args, filterArgs...) + + if model != "" { + where += " AND model = ?" + args = append(args, model) + } + if providerName != "" { + where += " AND provider = ?" + args = append(args, providerName) + } + + rows, err := s.db.Query("SELECT latency_ms FROM request_logs "+where+" ORDER BY latency_ms", args...) + if err != nil { + return &LatencyResult{} + } + defer rows.Close() + + var latencies []float64 + for rows.Next() { + var l float64 + rows.Scan(&l) + latencies = append(latencies, l) + } + + if len(latencies) == 0 { + return &LatencyResult{} + } + + sort.Float64s(latencies) + n := len(latencies) + var sum float64 + for _, l := range latencies { + sum += l + } + + return &LatencyResult{ + P50: latencies[n*50/100], + P95: latencies[n*95/100], + P99: latencies[min(n*99/100, n-1)], + Avg: sum / float64(n), + Min: latencies[0], + Max: latencies[n-1], + } +} + +// GetCostBreakdown returns cost data grouped by day and dimension. +func (s *StatsAPI) GetCostBreakdown(tokenNames []string, period, groupBy string) []CostBreakdownEntry { + var since int64 + switch period { + case "30d": + since = time.Now().AddDate(0, -1, 0).Unix() + case "7d": + since = time.Now().AddDate(0, 0, -7).Unix() + default: + since = time.Now().Add(-24 * time.Hour).Unix() + } + + tokenFilter, filterArgs := buildTokenFilter(tokenNames) + + groupCol := "model" + if groupBy == "token" { + groupCol = "token_name" + } else if groupBy == "provider" { + groupCol = "provider" + } + + args := []any{since} + args = append(args, filterArgs...) + + query := `SELECT date(timestamp, 'unixepoch') as day, ` + groupCol + `, + COALESCE(SUM(cost_usd), 0), COUNT(*) + FROM request_logs WHERE timestamp >= ?` + tokenFilter + ` + GROUP BY day, ` + groupCol + ` ORDER BY day, ` + groupCol + + rows, err := s.db.Query(query, args...) + if err != nil { + return nil + } + defer rows.Close() + + var results []CostBreakdownEntry + for rows.Next() { + var e CostBreakdownEntry + rows.Scan(&e.Day, &e.GroupBy, &e.CostUSD, &e.Requests) + results = append(results, e) + } + return results +} + // JSON HTTP handlers (thin wrappers). func (s *StatsAPI) Summary(w http.ResponseWriter, r *http.Request) { @@ -302,6 +572,58 @@ func (s *StatsAPI) Timeseries(w http.ResponseWriter, r *http.Request) { writeJSON(w, results) } +// Logs serves the paginated logs API. +func (s *StatsAPI) Logs(w http.ResponseWriter, r *http.Request) { + tokenNames := s.tokenNamesForUser(r) + page, _ := strconv.Atoi(r.URL.Query().Get("page")) + model := r.URL.Query().Get("model") + token := r.URL.Query().Get("token") + status := r.URL.Query().Get("status") + result := s.GetLogs(tokenNames, page, model, token, status) + writeJSON(w, result) +} + +// Latency serves latency percentiles API. +func (s *StatsAPI) Latency(w http.ResponseWriter, r *http.Request) { + tokenNames := s.tokenNamesForUser(r) + period := r.URL.Query().Get("period") + model := r.URL.Query().Get("model") + providerName := r.URL.Query().Get("provider") + result := s.GetLatency(tokenNames, period, model, providerName) + writeJSON(w, result) +} + +// CostBreakdown serves cost breakdown API. +func (s *StatsAPI) CostBreakdown(w http.ResponseWriter, r *http.Request) { + tokenNames := s.tokenNamesForUser(r) + period := r.URL.Query().Get("period") + groupBy := r.URL.Query().Get("group_by") + if groupBy == "" { + groupBy = "model" + } + result := s.GetCostBreakdown(tokenNames, period, groupBy) + writeJSON(w, result) +} + +// ProviderHealthHandler serves provider health status API. +func (s *StatsAPI) ProviderHealthHandler(w http.ResponseWriter, r *http.Request) { + if s.healthTracker == nil { + writeJSON(w, []provider.ProviderHealth{}) + return + } + writeJSON(w, s.healthTracker.Status()) +} + +// CacheStats serves cache statistics API. +func (s *StatsAPI) CacheStats(w http.ResponseWriter, r *http.Request) { + if s.cache == nil { + writeJSON(w, map[string]any{"enabled": false}) + return + } + stats := s.cache.Stats(r.Context()) + writeJSON(w, stats) +} + func writeJSON(w http.ResponseWriter, v any) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(v) diff --git a/llm-gateway/internal/dashboard/handler.go b/llm-gateway/internal/dashboard/handler.go index 6b246cd..af4e4f1 100644 --- a/llm-gateway/internal/dashboard/handler.go +++ b/llm-gateway/internal/dashboard/handler.go @@ -5,9 +5,12 @@ import ( "fmt" "html/template" "net/http" + "strconv" "time" "llm-gateway/internal/auth" + "llm-gateway/internal/cache" + "llm-gateway/internal/provider" ) //go:embed templates/*.html templates/partials/*.html @@ -20,9 +23,18 @@ var templateFuncs = template.FuncMap{ } return time.Unix(ts, 0).Format("2006-01-02") }, + "formatTimeDetail": func(ts int64) string { + if ts == 0 { + return "never" + } + return time.Unix(ts, 0).Format("2006-01-02 15:04:05") + }, "addInt": func(a, b int) int { return a + b }, + "subInt": func(a, b int) int { + return a - b + }, "formatCost": func(v float64) string { if v == 0 { return "$0.00" @@ -32,19 +44,87 @@ var templateFuncs = template.FuncMap{ } return fmt.Sprintf("$%.4f", v) }, + "formatPrice": func(v float64) string { + if v == 0 { + return "-" + } + return fmt.Sprintf("$%.2f", v) + }, + "formatPct": func(v float64) string { + return fmt.Sprintf("%.1f%%", v*100) + }, + "budgetPct": func(spend, budget float64) float64 { + if budget <= 0 { + return 0 + } + return spend / budget * 100 + }, + "budgetColor": func(pct float64) string { + if pct >= 80 { + return "#f87171" + } + if pct >= 50 { + return "#fbbf24" + } + return "#4ade80" + }, + "seq": func(start, end int) []int { + var s []int + for i := start; i <= end; i++ { + s = append(s, i) + } + return s + }, + "paginationStart": func(page, totalPages int) int { + start := page - 2 + if start < 1 { + start = 1 + } + if totalPages-start < 4 && totalPages > 4 { + start = totalPages - 4 + } + return start + }, + "paginationEnd": func(page, totalPages int) int { + start := page - 2 + if start < 1 { + start = 1 + } + end := start + 4 + if end > totalPages { + end = totalPages + } + return end + }, } // PageData is the common data passed to all templates. type PageData struct { ActivePage string User *auth.User - // Page-specific data - Summary *SummaryResult - Models []ModelStats - Providers []ProviderStats - TokenStats []TokenUsageStats + // Dashboard data + Summary *SummaryResult + Models []ModelStats + Providers []ProviderStats + TokenStats []TokenUsageStats + ProviderHealth []provider.ProviderHealth + Latency *LatencyResult + CacheEnabled bool + CacheInfo *cache.CacheStats + // Tokens page data Tokens []auth.APIToken - Users []auth.User + TokenSpend map[string]float64 + // Users page data + Users []auth.User + // Logs page data + LogsResult *LogsResult + LogModels []string + LogTokens []string + FilterModel string + FilterToken string + FilterStatus string + // Models routing page data + ModelRoutes []provider.ModelRouteInfo } // Dashboard serves the HTMX-based dashboard pages. @@ -52,6 +132,8 @@ type Dashboard struct { templates *template.Template authStore *auth.Store statsAPI *StatsAPI + registry *provider.Registry + cache *cache.Cache } // NewDashboard creates a new Dashboard handler. @@ -70,6 +152,16 @@ func NewDashboard(authStore *auth.Store, statsAPI *StatsAPI) *Dashboard { } } +// SetRegistry sets the provider registry for model routing display. +func (d *Dashboard) SetRegistry(r *provider.Registry) { + d.registry = r +} + +// SetCache sets the cache reference for cache stats display. +func (d *Dashboard) SetCache(c *cache.Cache) { + d.cache = c +} + // LoginPage serves the login page. func (d *Dashboard) LoginPage(w http.ResponseWriter, r *http.Request) { if !d.authStore.HasAnyUser() { @@ -106,11 +198,66 @@ func (d *Dashboard) DashboardPage(w http.ResponseWriter, r *http.Request) { Models: d.statsAPI.GetModels(tokenNames), Providers: d.statsAPI.GetProviders(tokenNames), TokenStats: d.statsAPI.GetTokenUsage(tokenNames), + Latency: d.statsAPI.GetLatency(tokenNames, "24h", "", ""), + } + + // Provider health + if d.statsAPI.healthTracker != nil { + data.ProviderHealth = d.statsAPI.healthTracker.Status() + } + + // Cache stats + if d.cache != nil { + data.CacheEnabled = true + data.CacheInfo = d.cache.Stats(r.Context()) } d.renderDashboardPage(w, r, "partials/dashboard.html", data) } +// LogsPage serves the request logs view. +func (d *Dashboard) LogsPage(w http.ResponseWriter, r *http.Request) { + user := auth.UserFromContext(r.Context()) + tokenNames := d.statsAPI.TokenNamesForUser(user) + + page, _ := strconv.Atoi(r.URL.Query().Get("page")) + if page < 1 { + page = 1 + } + model := r.URL.Query().Get("model") + token := r.URL.Query().Get("token") + status := r.URL.Query().Get("status") + + data := PageData{ + ActivePage: "logs", + User: user, + LogsResult: d.statsAPI.GetLogs(tokenNames, page, model, token, status), + LogModels: d.statsAPI.GetDistinctModels(), + LogTokens: d.statsAPI.GetDistinctTokens(), + FilterModel: model, + FilterToken: token, + FilterStatus: status, + } + + d.renderDashboardPage(w, r, "partials/logs.html", data) +} + +// ModelsPage serves the model routing table view. +func (d *Dashboard) ModelsPage(w http.ResponseWriter, r *http.Request) { + user := auth.UserFromContext(r.Context()) + + data := PageData{ + ActivePage: "models", + User: user, + } + + if d.registry != nil { + data.ModelRoutes = d.registry.AllRoutes() + } + + d.renderDashboardPage(w, r, "partials/models-page.html", data) +} + // TokensPage serves the tokens management view. func (d *Dashboard) TokensPage(w http.ResponseWriter, r *http.Request) { user := auth.UserFromContext(r.Context()) @@ -125,10 +272,17 @@ func (d *Dashboard) TokensPage(w http.ResponseWriter, r *http.Request) { tokens = []auth.APIToken{} } + // Get today's spend for budget display + spend, _ := d.statsAPI.db.TodaySpendAll() + if spend == nil { + spend = make(map[string]float64) + } + d.renderDashboardPage(w, r, "partials/tokens.html", PageData{ ActivePage: "tokens", User: user, Tokens: tokens, + TokenSpend: spend, }) } diff --git a/llm-gateway/internal/dashboard/templates/layout.html b/llm-gateway/internal/dashboard/templates/layout.html index e868774..66cc8ae 100644 --- a/llm-gateway/internal/dashboard/templates/layout.html +++ b/llm-gateway/internal/dashboard/templates/layout.html @@ -5,93 +5,228 @@ LLM Gateway + + +{{if .ProviderHealth}} +
+

Provider Health

+
+ {{range .ProviderHealth}} +
+ {{.Provider}} + {{.Status}} + {{printf "%.0f" .AvgLatency}}ms avg | {{formatPct .ErrorRate}} errors +
+ {{end}} +
+
+{{end}} + +{{if .Latency}}{{if gt .Latency.Max 0.0}} +
+
P50 Latency
{{printf "%.0f" .Latency.P50}}ms
+
P95 Latency
{{printf "%.0f" .Latency.P95}}ms
+
P99 Latency
{{printf "%.0f" .Latency.P99}}ms
+
Avg Latency
{{printf "%.0f" .Latency.Avg}}ms
+
+{{end}}{{end}} + +{{if .CacheEnabled}}{{if .CacheInfo}}{{if .CacheInfo.Connected}} +
+
Cache Hit Rate
{{formatPct .CacheInfo.HitRate}}
{{.CacheInfo.Hits}} hits / {{.CacheInfo.Misses}} misses
+
Cache Memory
{{.CacheInfo.MemoryUsed}}
+
Cached Keys
{{.CacheInfo.Keys}}
+
+{{end}}{{end}}{{end}} +
@@ -27,6 +59,16 @@
+
+

Cost Breakdown

+
+ + + +
+ +
+ {{if .Models}}

Models

@@ -88,7 +130,22 @@ {{end}}
{{end}} diff --git a/llm-gateway/internal/dashboard/templates/partials/logs.html b/llm-gateway/internal/dashboard/templates/partials/logs.html new file mode 100644 index 0000000..79dca12 --- /dev/null +++ b/llm-gateway/internal/dashboard/templates/partials/logs.html @@ -0,0 +1,120 @@ +{{define "content"}} + + +
+ + + + +
+ +
+ + + + + + + + + + + + + + + {{range $i, $log := .LogsResult.Logs}} + + + + + + + + + + + {{if $log.ErrorMessage}} + + + + {{end}} + {{end}} + {{if not .LogsResult.Logs}} + + {{end}} + +
TimeTokenModelProviderStatusLatencyTokensCost
{{formatTimeDetail $log.Timestamp}}{{$log.TokenName}}{{$log.Model}}{{$log.Provider}} + {{if eq $log.Status "success"}}success + {{else if eq $log.Status "error"}}error + {{else if eq $log.Status "cached"}}cached + {{else}}{{$log.Status}}{{end}} + {{if $log.Streaming}} stream{{end}} + {{$log.LatencyMS}}ms{{$log.InputTokens}} / {{$log.OutputTokens}}{{formatCost $log.CostUSD}}
+
{{$log.ErrorMessage}}
+
No logs found
+ + {{if gt .LogsResult.TotalPages 1}} + + {{end}} +
+ + +{{end}} diff --git a/llm-gateway/internal/dashboard/templates/partials/models-page.html b/llm-gateway/internal/dashboard/templates/partials/models-page.html new file mode 100644 index 0000000..c81109e --- /dev/null +++ b/llm-gateway/internal/dashboard/templates/partials/models-page.html @@ -0,0 +1,39 @@ +{{define "content"}} + + +{{if .ModelRoutes}} +{{range .ModelRoutes}} +
+

{{.Name}}

+ + + + + + + + + + + + {{range .Routes}} + + + + + + + + {{end}} + +
ProviderProvider ModelPriorityInput Price (per 1M)Output Price (per 1M)
{{.ProviderName}}{{.ProviderModel}}{{.Priority}}{{formatPrice .InputPrice}}{{formatPrice .OutputPrice}}
+
+{{end}} +{{else}} +
+ No models configured +
+{{end}} +{{end}} diff --git a/llm-gateway/internal/dashboard/templates/partials/tokens.html b/llm-gateway/internal/dashboard/templates/partials/tokens.html index 58ebbbf..7f330d2 100644 --- a/llm-gateway/internal/dashboard/templates/partials/tokens.html +++ b/llm-gateway/internal/dashboard/templates/partials/tokens.html @@ -10,9 +10,9 @@
-

Static Tokens (from config, managed via environment variables)

+

Static Tokens (from config, managed via environment variables)

- + {{range .Tokens}}{{if lt .ID 0}} @@ -20,6 +20,18 @@ + {{end}}{{end}} @@ -28,17 +40,28 @@
-

Dynamic Tokens (created via dashboard)

+

Dynamic Tokens (created via dashboard)

NamePrefixRate LimitBudget
NamePrefixRate LimitBudgetToday's Spend
{{.KeyPrefix}}... {{if eq .RateLimitRPM 0}}unlimited{{else}}{{.RateLimitRPM}} rpm{{end}} {{if gt .DailyBudgetUSD 0.0}}${{printf "%.2f" .DailyBudgetUSD}}{{else}}unlimited{{end}} + {{$spend := index $.TokenSpend .Name}} + {{if gt .DailyBudgetUSD 0.0}} + {{$pct := budgetPct $spend .DailyBudgetUSD}} +
+
+
${{printf "%.4f" $spend}} / ${{printf "%.2f" .DailyBudgetUSD}} ({{printf "%.1f" $pct}}%)
+
+ {{else}} + {{if gt $spend 0.0}}{{formatCost $spend}}{{else}}-{{end}} + {{end}} +
config
- + - {{$hasDynamic := false}} {{range .Tokens}}{{if gt .ID 0}} + diff --git a/llm-gateway/internal/provider/health.go b/llm-gateway/internal/provider/health.go new file mode 100644 index 0000000..ae6d97e --- /dev/null +++ b/llm-gateway/internal/provider/health.go @@ -0,0 +1,121 @@ +package provider + +import ( + "sync" + "time" +) + +// HealthEvent represents a single request outcome for a provider. +type HealthEvent struct { + Timestamp time.Time + LatencyMS int64 + IsError bool + ErrorMsg string +} + +// ProviderHealth is the computed health status for a provider. +type ProviderHealth struct { + Provider string `json:"provider"` + Status string `json:"status"` // healthy, degraded, down + ErrorRate float64 `json:"error_rate"` + AvgLatency float64 `json:"avg_latency_ms"` + Total int `json:"total"` + Errors int `json:"errors"` +} + +// HealthTracker tracks per-provider health using a sliding window. +type HealthTracker struct { + mu sync.RWMutex + windows map[string][]HealthEvent + windowDu time.Duration +} + +// NewHealthTracker creates a health tracker with the given window duration. +func NewHealthTracker(window time.Duration) *HealthTracker { + if window == 0 { + window = 5 * time.Minute + } + return &HealthTracker{ + windows: make(map[string][]HealthEvent), + windowDu: window, + } +} + +// Record adds a health event for a provider. +func (h *HealthTracker) Record(provider string, latencyMS int64, err error) { + event := HealthEvent{ + Timestamp: time.Now(), + LatencyMS: latencyMS, + IsError: err != nil, + } + if err != nil { + event.ErrorMsg = err.Error() + } + + h.mu.Lock() + defer h.mu.Unlock() + + h.windows[provider] = append(h.windows[provider], event) + h.prune(provider) +} + +// Status returns computed health for all tracked providers. +func (h *HealthTracker) Status() []ProviderHealth { + h.mu.RLock() + defer h.mu.RUnlock() + + cutoff := time.Now().Add(-h.windowDu) + var results []ProviderHealth + + for provider, events := range h.windows { + var total, errors int + var totalLatency int64 + + for _, e := range events { + if e.Timestamp.Before(cutoff) { + continue + } + total++ + totalLatency += e.LatencyMS + if e.IsError { + errors++ + } + } + + if total == 0 { + continue + } + + errorRate := float64(errors) / float64(total) + status := "healthy" + if errorRate >= 0.5 { + status = "down" + } else if errorRate >= 0.1 { + status = "degraded" + } + + results = append(results, ProviderHealth{ + Provider: provider, + Status: status, + ErrorRate: errorRate, + AvgLatency: float64(totalLatency) / float64(total), + Total: total, + Errors: errors, + }) + } + + return results +} + +// prune removes events outside the window. Must be called with lock held. +func (h *HealthTracker) prune(provider string) { + cutoff := time.Now().Add(-h.windowDu) + events := h.windows[provider] + i := 0 + for i < len(events) && events[i].Timestamp.Before(cutoff) { + i++ + } + if i > 0 { + h.windows[provider] = events[i:] + } +} diff --git a/llm-gateway/internal/provider/registry.go b/llm-gateway/internal/provider/registry.go index 8b9d001..1a2302b 100644 --- a/llm-gateway/internal/provider/registry.go +++ b/llm-gateway/internal/provider/registry.go @@ -70,3 +70,38 @@ func (r *Registry) Lookup(model string) ([]Route, bool) { func (r *Registry) ModelNames() []string { return r.order } + +// RouteInfo exposes route details for dashboard display. +type RouteInfo struct { + ProviderName string `json:"provider_name"` + ProviderModel string `json:"provider_model"` + Priority int `json:"priority"` + InputPrice float64 `json:"input_price"` + OutputPrice float64 `json:"output_price"` +} + +// ModelRouteInfo exposes a model and its routes for dashboard display. +type ModelRouteInfo struct { + Name string `json:"name"` + Routes []RouteInfo `json:"routes"` +} + +// AllRoutes returns all models and their routes in config order. +func (r *Registry) AllRoutes() []ModelRouteInfo { + results := make([]ModelRouteInfo, 0, len(r.order)) + for _, name := range r.order { + routes := r.routes[name] + info := ModelRouteInfo{Name: name} + for _, rt := range routes { + info.Routes = append(info.Routes, RouteInfo{ + ProviderName: rt.Provider.Name(), + ProviderModel: rt.ProviderModel, + Priority: rt.Priority, + InputPrice: rt.InputPrice, + OutputPrice: rt.OutputPrice, + }) + } + results = append(results, info) + } + return results +} diff --git a/llm-gateway/internal/proxy/handler.go b/llm-gateway/internal/proxy/handler.go index 99eb125..ba4f36a 100644 --- a/llm-gateway/internal/proxy/handler.go +++ b/llm-gateway/internal/proxy/handler.go @@ -41,20 +41,22 @@ func getAPIToken(ctx context.Context) *auth.APIToken { } type Handler struct { - registry *provider.Registry - logger *storage.AsyncLogger - cache *cache.Cache - metrics *metrics.Metrics - cfg *config.Config + registry *provider.Registry + logger *storage.AsyncLogger + cache *cache.Cache + metrics *metrics.Metrics + cfg *config.Config + healthTracker *provider.HealthTracker } -func NewHandler(registry *provider.Registry, logger *storage.AsyncLogger, c *cache.Cache, m *metrics.Metrics, cfg *config.Config) *Handler { +func NewHandler(registry *provider.Registry, logger *storage.AsyncLogger, c *cache.Cache, m *metrics.Metrics, cfg *config.Config, ht *provider.HealthTracker) *Handler { return &Handler{ - registry: registry, - logger: logger, - cache: c, - metrics: m, - cfg: cfg, + registry: registry, + logger: logger, + cache: c, + metrics: m, + cfg: cfg, + healthTracker: ht, } } @@ -117,6 +119,9 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, r *http.Request, req *p // Client error — don't retry h.metrics.RecordRequest(req.Model, route.Provider.Name(), tokenName, "error", latency, 0, 0, 0) h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, 0, 0, 0, latency, "error", err.Error(), false, false) + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, err) + } writeErrorRaw(w, pe.StatusCode, pe.Body) return } @@ -124,9 +129,16 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, r *http.Request, req *p log.Printf("Provider %s failed for %s: %v", route.Provider.Name(), req.Model, err) h.metrics.RecordRequest(req.Model, route.Provider.Name(), tokenName, "error", latency, 0, 0, 0) h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, 0, 0, 0, latency, "error", err.Error(), false, false) + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, err) + } continue } + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, nil) + } + // Compute cost inputTokens, outputTokens := 0, 0 if resp.Usage != nil { diff --git a/llm-gateway/internal/proxy/stream.go b/llm-gateway/internal/proxy/stream.go index 39fb596..eb9e225 100644 --- a/llm-gateway/internal/proxy/stream.go +++ b/llm-gateway/internal/proxy/stream.go @@ -31,12 +31,19 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, req *prov latency := time.Since(start).Milliseconds() h.metrics.RecordRequest(req.Model, route.Provider.Name(), tokenName, "error", latency, 0, 0, 0) h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, 0, 0, 0, latency, "error", err.Error(), true, false) + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, err) + } writeErrorRaw(w, pe.StatusCode, pe.Body) return } lastErr = err + latency := time.Since(start).Milliseconds() log.Printf("Provider %s stream failed for %s: %v", route.Provider.Name(), req.Model, err) - h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, 0, 0, 0, time.Since(start).Milliseconds(), "error", err.Error(), true, false) + h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, 0, 0, 0, latency, "error", err.Error(), true, false) + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, err) + } continue } @@ -84,6 +91,9 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, req *prov cost := computeCost(inputTokens, outputTokens, route.InputPrice, route.OutputPrice) h.metrics.RecordRequest(req.Model, route.Provider.Name(), tokenName, "success", latency, inputTokens, outputTokens, cost) h.logRequest(tokenName, req.Model, route.Provider.Name(), route.ProviderModel, inputTokens, outputTokens, cost, latency, "success", "", true, false) + if h.healthTracker != nil { + h.healthTracker.Record(route.Provider.Name(), latency, nil) + } return } diff --git a/llm-gateway/internal/storage/db.go b/llm-gateway/internal/storage/db.go index 60f0396..60d705a 100644 --- a/llm-gateway/internal/storage/db.go +++ b/llm-gateway/internal/storage/db.go @@ -101,3 +101,27 @@ func (db *DB) TodaySpend(tokenName string) (float64, error) { } return total.Float64, nil } + +// TodaySpendAll returns today's spend for all tokens as a map. +func (db *DB) TodaySpendAll() (map[string]float64, error) { + startOfDay := time.Now().Truncate(24 * time.Hour).Unix() + rows, err := db.Query( + "SELECT token_name, SUM(cost_usd) FROM request_logs WHERE timestamp >= ? GROUP BY token_name", + startOfDay, + ) + if err != nil { + return nil, err + } + defer rows.Close() + + result := make(map[string]float64) + for rows.Next() { + var name string + var total float64 + if err := rows.Scan(&name, &total); err != nil { + continue + } + result[name] = total + } + return result, nil +}
NamePrefixRate LimitBudgetCreatedLast Used
NamePrefixRate LimitBudgetToday's SpendCreatedLast Used
{{.Name}} {{.KeyPrefix}}... {{if eq .RateLimitRPM 0}}unlimited{{else}}{{.RateLimitRPM}} rpm{{end}} {{if gt .DailyBudgetUSD 0.0}}${{printf "%.2f" .DailyBudgetUSD}}{{else}}unlimited{{end}} + {{$spend := index $.TokenSpend .Name}} + {{if gt .DailyBudgetUSD 0.0}} + {{$pct := budgetPct $spend .DailyBudgetUSD}} +
+
+
${{printf "%.4f" $spend}} / ${{printf "%.2f" .DailyBudgetUSD}} ({{printf "%.1f" $pct}}%)
+
+ {{else}} + {{if gt $spend 0.0}}{{formatCost $spend}}{{else}}-{{end}} + {{end}} +
{{formatTime .CreatedAt}} {{if gt .LastUsedAt 0}}{{formatTime .LastUsedAt}}{{else}}never{{end}}