ai-servers/llm-gateway/internal/metrics/prometheus.go

53 lines
1.8 KiB
Go

package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
type Metrics struct {
requestsTotal *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
tokensTotal *prometheus.CounterVec
costTotal *prometheus.CounterVec
}
func New() *Metrics {
return &Metrics{
requestsTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_requests_total",
Help: "Total number of LLM requests",
}, []string{"model", "provider", "token_name", "status"}),
requestDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "llm_gateway_request_duration_ms",
Help: "Request duration in milliseconds",
Buckets: []float64{100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000},
}, []string{"model", "provider"}),
tokensTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_tokens_total",
Help: "Total tokens processed",
}, []string{"model", "provider", "type"}),
costTotal: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "llm_gateway_cost_usd_total",
Help: "Total cost in USD",
}, []string{"model", "provider", "token_name"}),
}
}
func (m *Metrics) RecordRequest(model, providerName, tokenName, status string, latencyMS int64, inputTokens, outputTokens int, cost float64) {
m.requestsTotal.WithLabelValues(model, providerName, tokenName, status).Inc()
m.requestDuration.WithLabelValues(model, providerName).Observe(float64(latencyMS))
if inputTokens > 0 {
m.tokensTotal.WithLabelValues(model, providerName, "input").Add(float64(inputTokens))
}
if outputTokens > 0 {
m.tokensTotal.WithLabelValues(model, providerName, "output").Add(float64(outputTokens))
}
if cost > 0 {
m.costTotal.WithLabelValues(model, providerName, tokenName).Add(cost)
}
}