ai-servers/llm-gateway/internal/metrics/prometheus.go
Ray Andrew 90adf6f3a8
feat(gateway): add circuit breaker, retry, and concurrency limit support
feat(gateway): add debug logging with file storage and retention

feat(gateway): add audit logging for user actions

feat(gateway): add request ID tracking and rate limit headers

feat(gateway): add model aliases and load balancing strategies

feat(gateway): add config hot-reload via SIGHUP

feat(gateway): add CORS support

feat(gateway): add data export API and dashboard endpoints

feat(gateway): add dashboard pages for audit and debug logs

feat(gateway): add concurrent request limiting per token

feat(gateway): add streaming timeout support

feat(gateway): add migration support for new schema fields
2026-02-15 04:21:40 -06:00

73 lines
2.3 KiB
Go

package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
type Metrics struct {
requestsTotal *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
tokensTotal *prometheus.CounterVec
costTotal *prometheus.CounterVec
cacheHits prometheus.Counter
cacheMisses prometheus.Counter
}
func New() *Metrics {
return &Metrics{
requestsTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_requests_total",
Help: "Total number of LLM requests",
}, []string{"model", "provider", "token_name", "status"}),
requestDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "llm_gateway_request_duration_ms",
Help: "Request duration in milliseconds",
Buckets: []float64{100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000},
}, []string{"model", "provider"}),
tokensTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_tokens_total",
Help: "Total tokens processed",
}, []string{"model", "provider", "type"}),
costTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_cost_usd_total",
Help: "Total cost in USD",
}, []string{"model", "provider", "token_name"}),
cacheHits: promauto.NewCounter(prometheus.CounterOpts{
Name: "llm_gateway_cache_hits_total",
Help: "Total number of cache hits",
}),
cacheMisses: promauto.NewCounter(prometheus.CounterOpts{
Name: "llm_gateway_cache_misses_total",
Help: "Total number of cache misses",
}),
}
}
func (m *Metrics) RecordRequest(model, providerName, tokenName, status string, latencyMS int64, inputTokens, outputTokens int, cost float64) {
m.requestsTotal.WithLabelValues(model, providerName, tokenName, status).Inc()
m.requestDuration.WithLabelValues(model, providerName).Observe(float64(latencyMS))
if inputTokens > 0 {
m.tokensTotal.WithLabelValues(model, providerName, "input").Add(float64(inputTokens))
}
if outputTokens > 0 {
m.tokensTotal.WithLabelValues(model, providerName, "output").Add(float64(outputTokens))
}
if cost > 0 {
m.costTotal.WithLabelValues(model, providerName, tokenName).Add(cost)
}
}
func (m *Metrics) RecordCacheHit() {
m.cacheHits.Inc()
}
func (m *Metrics) RecordCacheMiss() {
m.cacheMisses.Inc()
}