ai-servers/llm-gateway/internal/proxy/ratelimit.go
Ray Andrew 90adf6f3a8
feat(gateway): add circuit breaker, retry, and concurrency limit support
feat(gateway): add debug logging with file storage and retention

feat(gateway): add audit logging for user actions

feat(gateway): add request ID tracking and rate limit headers

feat(gateway): add model aliases and load balancing strategies

feat(gateway): add config hot-reload via SIGHUP

feat(gateway): add CORS support

feat(gateway): add data export API and dashboard endpoints

feat(gateway): add dashboard pages for audit and debug logs

feat(gateway): add concurrent request limiting per token

feat(gateway): add streaming timeout support

feat(gateway): add migration support for new schema fields
2026-02-15 04:21:40 -06:00

122 lines
2.8 KiB
Go

package proxy
import (
"fmt"
"math"
"net/http"
"sync"
"time"
"llm-gateway/internal/storage"
)
type RateLimiter struct {
db *storage.DB
mu sync.Mutex
buckets map[string]*tokenBucket
}
type tokenBucket struct {
tokens float64
maxTokens float64
refillRate float64 // tokens per second
lastRefill time.Time
}
func NewRateLimiter(db *storage.DB) *RateLimiter {
return &RateLimiter{
db: db,
buckets: make(map[string]*tokenBucket),
}
}
func (rl *RateLimiter) Check(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
apiToken := getAPIToken(r.Context())
if apiToken == nil {
next.ServeHTTP(w, r)
return
}
tokenName := apiToken.Name
// Check rate limit
if apiToken.RateLimitRPM > 0 {
allowed, remaining, resetAt := rl.allow(tokenName, apiToken.RateLimitRPM)
// Set rate limit headers on all responses
w.Header().Set("X-RateLimit-Limit", fmt.Sprintf("%d", apiToken.RateLimitRPM))
w.Header().Set("X-RateLimit-Remaining", fmt.Sprintf("%d", remaining))
w.Header().Set("X-RateLimit-Reset", fmt.Sprintf("%d", resetAt))
if !allowed {
retryAfter := resetAt - time.Now().Unix()
if retryAfter < 1 {
retryAfter = 1
}
w.Header().Set("Retry-After", fmt.Sprintf("%d", retryAfter))
writeError(w, http.StatusTooManyRequests, "rate limit exceeded")
return
}
}
// Check daily budget
if apiToken.DailyBudgetUSD > 0 {
spent, err := rl.db.TodaySpend(tokenName)
if err == nil && spent >= apiToken.DailyBudgetUSD {
writeError(w, http.StatusTooManyRequests, "daily budget exceeded")
return
}
}
next.ServeHTTP(w, r)
})
}
func (rl *RateLimiter) allow(tokenName string, rateLimitRPM int) (bool, int, int64) {
rl.mu.Lock()
defer rl.mu.Unlock()
bucket, ok := rl.buckets[tokenName]
if !ok {
bucket = &tokenBucket{
tokens: float64(rateLimitRPM),
maxTokens: float64(rateLimitRPM),
refillRate: float64(rateLimitRPM) / 60.0,
lastRefill: time.Now(),
}
rl.buckets[tokenName] = bucket
}
now := time.Now()
elapsed := now.Sub(bucket.lastRefill).Seconds()
bucket.tokens += elapsed * bucket.refillRate
if bucket.tokens > bucket.maxTokens {
bucket.tokens = bucket.maxTokens
}
bucket.lastRefill = now
remaining := int(math.Floor(bucket.tokens))
if remaining < 0 {
remaining = 0
}
// Compute reset time: when bucket would be full again
deficit := bucket.maxTokens - bucket.tokens
var resetAt int64
if deficit > 0 && bucket.refillRate > 0 {
resetAt = now.Add(time.Duration(deficit/bucket.refillRate) * time.Second).Unix()
} else {
resetAt = now.Unix()
}
if bucket.tokens < 1 {
return false, 0, resetAt
}
bucket.tokens--
remaining = int(math.Floor(bucket.tokens))
if remaining < 0 {
remaining = 0
}
return true, remaining, resetAt
}