121 lines
2.6 KiB
Go
121 lines
2.6 KiB
Go
package provider
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// HealthEvent represents a single request outcome for a provider.
|
|
type HealthEvent struct {
|
|
Timestamp time.Time
|
|
LatencyMS int64
|
|
IsError bool
|
|
ErrorMsg string
|
|
}
|
|
|
|
// ProviderHealth is the computed health status for a provider.
|
|
type ProviderHealth struct {
|
|
Provider string `json:"provider"`
|
|
Status string `json:"status"` // healthy, degraded, down
|
|
ErrorRate float64 `json:"error_rate"`
|
|
AvgLatency float64 `json:"avg_latency_ms"`
|
|
Total int `json:"total"`
|
|
Errors int `json:"errors"`
|
|
}
|
|
|
|
// HealthTracker tracks per-provider health using a sliding window.
|
|
type HealthTracker struct {
|
|
mu sync.RWMutex
|
|
windows map[string][]HealthEvent
|
|
windowDu time.Duration
|
|
}
|
|
|
|
// NewHealthTracker creates a health tracker with the given window duration.
|
|
func NewHealthTracker(window time.Duration) *HealthTracker {
|
|
if window == 0 {
|
|
window = 5 * time.Minute
|
|
}
|
|
return &HealthTracker{
|
|
windows: make(map[string][]HealthEvent),
|
|
windowDu: window,
|
|
}
|
|
}
|
|
|
|
// Record adds a health event for a provider.
|
|
func (h *HealthTracker) Record(provider string, latencyMS int64, err error) {
|
|
event := HealthEvent{
|
|
Timestamp: time.Now(),
|
|
LatencyMS: latencyMS,
|
|
IsError: err != nil,
|
|
}
|
|
if err != nil {
|
|
event.ErrorMsg = err.Error()
|
|
}
|
|
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
|
|
h.windows[provider] = append(h.windows[provider], event)
|
|
h.prune(provider)
|
|
}
|
|
|
|
// Status returns computed health for all tracked providers.
|
|
func (h *HealthTracker) Status() []ProviderHealth {
|
|
h.mu.RLock()
|
|
defer h.mu.RUnlock()
|
|
|
|
cutoff := time.Now().Add(-h.windowDu)
|
|
var results []ProviderHealth
|
|
|
|
for provider, events := range h.windows {
|
|
var total, errors int
|
|
var totalLatency int64
|
|
|
|
for _, e := range events {
|
|
if e.Timestamp.Before(cutoff) {
|
|
continue
|
|
}
|
|
total++
|
|
totalLatency += e.LatencyMS
|
|
if e.IsError {
|
|
errors++
|
|
}
|
|
}
|
|
|
|
if total == 0 {
|
|
continue
|
|
}
|
|
|
|
errorRate := float64(errors) / float64(total)
|
|
status := "healthy"
|
|
if errorRate >= 0.5 {
|
|
status = "down"
|
|
} else if errorRate >= 0.1 {
|
|
status = "degraded"
|
|
}
|
|
|
|
results = append(results, ProviderHealth{
|
|
Provider: provider,
|
|
Status: status,
|
|
ErrorRate: errorRate,
|
|
AvgLatency: float64(totalLatency) / float64(total),
|
|
Total: total,
|
|
Errors: errors,
|
|
})
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// prune removes events outside the window. Must be called with lock held.
|
|
func (h *HealthTracker) prune(provider string) {
|
|
cutoff := time.Now().Add(-h.windowDu)
|
|
events := h.windows[provider]
|
|
i := 0
|
|
for i < len(events) && events[i].Timestamp.Before(cutoff) {
|
|
i++
|
|
}
|
|
if i > 0 {
|
|
h.windows[provider] = events[i:]
|
|
}
|
|
}
|