ai-servers/llm-gateway/internal/provider/health_test.go
Ray Andrew 90adf6f3a8
feat(gateway): add circuit breaker, retry, and concurrency limit support
feat(gateway): add debug logging with file storage and retention

feat(gateway): add audit logging for user actions

feat(gateway): add request ID tracking and rate limit headers

feat(gateway): add model aliases and load balancing strategies

feat(gateway): add config hot-reload via SIGHUP

feat(gateway): add CORS support

feat(gateway): add data export API and dashboard endpoints

feat(gateway): add dashboard pages for audit and debug logs

feat(gateway): add concurrent request limiting per token

feat(gateway): add streaming timeout support

feat(gateway): add migration support for new schema fields
2026-02-15 04:21:40 -06:00

345 lines
8.3 KiB
Go

package provider
import (
"errors"
"testing"
"time"
"llm-gateway/internal/config"
)
func newTestTracker(window time.Duration, cb config.CircuitBreakerConfig) *HealthTracker {
return NewHealthTracker(window, cb)
}
func defaultCBConfig() config.CircuitBreakerConfig {
return config.CircuitBreakerConfig{
Enabled: true,
ErrorThreshold: 0.5,
MinRequests: 3,
CooldownDuration: 100 * time.Millisecond,
}
}
func TestHealthTracker_Record(t *testing.T) {
ht := newTestTracker(5*time.Minute, config.CircuitBreakerConfig{})
ht.Record("provA", 100, nil)
ht.Record("provA", 200, errors.New("fail"))
ht.Record("provB", 50, nil)
ht.mu.RLock()
defer ht.mu.RUnlock()
if len(ht.windows["provA"]) != 2 {
t.Fatalf("expected 2 events for provA, got %d", len(ht.windows["provA"]))
}
if len(ht.windows["provB"]) != 1 {
t.Fatalf("expected 1 event for provB, got %d", len(ht.windows["provB"]))
}
// Verify event fields
ev := ht.windows["provA"][1]
if !ev.IsError || ev.ErrorMsg != "fail" || ev.LatencyMS != 200 {
t.Fatalf("unexpected event fields: %+v", ev)
}
}
func TestHealthTracker_Status(t *testing.T) {
tests := []struct {
name string
successCount int
errorCount int
wantStatus string
wantErrorRate float64
wantTotal int
wantErrors int
}{
{
name: "healthy - no errors",
successCount: 10,
errorCount: 0,
wantStatus: "healthy",
wantErrorRate: 0.0,
wantTotal: 10,
wantErrors: 0,
},
{
name: "healthy - below 10% errors",
successCount: 19,
errorCount: 1,
wantStatus: "healthy",
wantErrorRate: 0.05,
wantTotal: 20,
wantErrors: 1,
},
{
name: "degraded - 20% errors",
successCount: 8,
errorCount: 2,
wantStatus: "degraded",
wantErrorRate: 0.2,
wantTotal: 10,
wantErrors: 2,
},
{
name: "degraded - exactly 10% errors",
successCount: 9,
errorCount: 1,
wantStatus: "degraded",
wantErrorRate: 0.1,
wantTotal: 10,
wantErrors: 1,
},
{
name: "down - 50% errors",
successCount: 5,
errorCount: 5,
wantStatus: "down",
wantErrorRate: 0.5,
wantTotal: 10,
wantErrors: 5,
},
{
name: "down - all errors",
successCount: 0,
errorCount: 5,
wantStatus: "down",
wantErrorRate: 1.0,
wantTotal: 5,
wantErrors: 5,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ht := newTestTracker(5*time.Minute, config.CircuitBreakerConfig{})
for i := 0; i < tt.successCount; i++ {
ht.Record("prov", 100, nil)
}
for i := 0; i < tt.errorCount; i++ {
ht.Record("prov", 100, errors.New("err"))
}
statuses := ht.Status()
if len(statuses) != 1 {
t.Fatalf("expected 1 status, got %d", len(statuses))
}
s := statuses[0]
if s.Status != tt.wantStatus {
t.Errorf("status = %q, want %q", s.Status, tt.wantStatus)
}
if s.Total != tt.wantTotal {
t.Errorf("total = %d, want %d", s.Total, tt.wantTotal)
}
if s.Errors != tt.wantErrors {
t.Errorf("errors = %d, want %d", s.Errors, tt.wantErrors)
}
// Allow small float tolerance
if diff := s.ErrorRate - tt.wantErrorRate; diff > 0.001 || diff < -0.001 {
t.Errorf("error_rate = %f, want %f", s.ErrorRate, tt.wantErrorRate)
}
})
}
}
func TestHealthTracker_CircuitBreaker_ClosedToOpen(t *testing.T) {
cb := defaultCBConfig()
cb.MinRequests = 3
cb.ErrorThreshold = 0.5
ht := newTestTracker(5*time.Minute, cb)
// Record errors to exceed threshold (3 errors out of 3 = 100% > 50%)
ht.Record("prov", 100, errors.New("err"))
ht.Record("prov", 100, errors.New("err"))
ht.Record("prov", 100, errors.New("err"))
ht.mu.RLock()
state := ht.circuits["prov"].State
ht.mu.RUnlock()
if state != CircuitOpen {
t.Fatalf("expected CircuitOpen, got %s", state)
}
if ht.IsAvailable("prov") {
t.Fatal("expected IsAvailable=false when circuit is open")
}
}
func TestHealthTracker_CircuitBreaker_OpenToHalfOpenOnCooldown(t *testing.T) {
cb := defaultCBConfig()
cb.CooldownDuration = 50 * time.Millisecond
ht := newTestTracker(5*time.Minute, cb)
// Trip the circuit
for i := 0; i < 5; i++ {
ht.Record("prov", 100, errors.New("err"))
}
if ht.IsAvailable("prov") {
t.Fatal("expected circuit open, IsAvailable should be false")
}
// Wait for cooldown
time.Sleep(60 * time.Millisecond)
// After cooldown, IsAvailable should return true (will transition to half-open)
if !ht.IsAvailable("prov") {
t.Fatal("expected IsAvailable=true after cooldown")
}
}
func TestHealthTracker_CircuitBreaker_HalfOpenToClosedOnSuccess(t *testing.T) {
cb := defaultCBConfig()
cb.CooldownDuration = 10 * time.Millisecond
ht := newTestTracker(5*time.Minute, cb)
// Trip the circuit
for i := 0; i < 5; i++ {
ht.Record("prov", 100, errors.New("err"))
}
// Wait for cooldown so next Record transitions through Open->HalfOpen
time.Sleep(20 * time.Millisecond)
// A successful record should transition: Open -> HalfOpen -> Closed
ht.Record("prov", 100, nil)
ht.mu.RLock()
state := ht.circuits["prov"].State
ht.mu.RUnlock()
if state != CircuitClosed {
t.Fatalf("expected CircuitClosed after success in half-open, got %s", state)
}
if !ht.IsAvailable("prov") {
t.Fatal("expected IsAvailable=true after circuit closed")
}
}
func TestHealthTracker_CircuitBreaker_HalfOpenToOpenOnFailure(t *testing.T) {
cb := defaultCBConfig()
cb.CooldownDuration = 10 * time.Millisecond
ht := newTestTracker(5*time.Minute, cb)
// Trip the circuit
for i := 0; i < 5; i++ {
ht.Record("prov", 100, errors.New("err"))
}
// Wait for cooldown
time.Sleep(20 * time.Millisecond)
// A failed record should transition: Open -> HalfOpen -> Open
ht.Record("prov", 100, errors.New("still failing"))
ht.mu.RLock()
state := ht.circuits["prov"].State
ht.mu.RUnlock()
if state != CircuitOpen {
t.Fatalf("expected CircuitOpen after failure in half-open, got %s", state)
}
}
func TestHealthTracker_IsAvailable_NoCircuitBreaker(t *testing.T) {
ht := newTestTracker(5*time.Minute, config.CircuitBreakerConfig{Enabled: false})
// Even with errors, IsAvailable should return true when CB is disabled
for i := 0; i < 10; i++ {
ht.Record("prov", 100, errors.New("err"))
}
if !ht.IsAvailable("prov") {
t.Fatal("expected IsAvailable=true when circuit breaker disabled")
}
}
func TestHealthTracker_IsAvailable_UnknownProvider(t *testing.T) {
ht := newTestTracker(5*time.Minute, defaultCBConfig())
if !ht.IsAvailable("unknown") {
t.Fatal("expected IsAvailable=true for unknown provider (no circuit)")
}
}
func TestHealthTracker_WindowPruning(t *testing.T) {
// Use a tiny window so events expire quickly
ht := newTestTracker(50*time.Millisecond, config.CircuitBreakerConfig{})
ht.Record("prov", 100, nil)
ht.Record("prov", 200, nil)
// Wait for events to expire
time.Sleep(60 * time.Millisecond)
// Record a new event to trigger pruning
ht.Record("prov", 300, nil)
ht.mu.RLock()
count := len(ht.windows["prov"])
ht.mu.RUnlock()
if count != 1 {
t.Fatalf("expected 1 event after pruning, got %d", count)
}
}
func TestHealthTracker_Status_EmptyAfterPruning(t *testing.T) {
ht := newTestTracker(50*time.Millisecond, config.CircuitBreakerConfig{})
ht.Record("prov", 100, nil)
// Wait for events to expire
time.Sleep(60 * time.Millisecond)
statuses := ht.Status()
if len(statuses) != 0 {
t.Fatalf("expected 0 statuses after window expiry, got %d", len(statuses))
}
}
func TestHealthTracker_Status_AvgLatency(t *testing.T) {
ht := newTestTracker(5*time.Minute, config.CircuitBreakerConfig{})
ht.Record("prov", 100, nil)
ht.Record("prov", 200, nil)
ht.Record("prov", 300, nil)
statuses := ht.Status()
if len(statuses) != 1 {
t.Fatalf("expected 1 status, got %d", len(statuses))
}
want := 200.0
if diff := statuses[0].AvgLatency - want; diff > 0.001 || diff < -0.001 {
t.Errorf("avg_latency = %f, want %f", statuses[0].AvgLatency, want)
}
}
func TestHealthTracker_Status_CircuitStateReported(t *testing.T) {
cb := defaultCBConfig()
ht := newTestTracker(5*time.Minute, cb)
// Trip the circuit
for i := 0; i < 5; i++ {
ht.Record("prov", 100, errors.New("err"))
}
statuses := ht.Status()
if len(statuses) != 1 {
t.Fatalf("expected 1 status, got %d", len(statuses))
}
if statuses[0].CircuitState != "open" {
t.Errorf("circuit_state = %q, want %q", statuses[0].CircuitState, "open")
}
}