package proxy import ( "fmt" "math" "net/http" "sync" "time" "llm-gateway/internal/storage" ) type RateLimiter struct { db *storage.DB mu sync.Mutex buckets map[string]*tokenBucket } type tokenBucket struct { tokens float64 maxTokens float64 refillRate float64 // tokens per second lastRefill time.Time } func NewRateLimiter(db *storage.DB) *RateLimiter { return &RateLimiter{ db: db, buckets: make(map[string]*tokenBucket), } } func (rl *RateLimiter) Check(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { apiToken := getAPIToken(r.Context()) if apiToken == nil { next.ServeHTTP(w, r) return } tokenName := apiToken.Name // Check rate limit if apiToken.RateLimitRPM > 0 { allowed, remaining, resetAt := rl.allow(tokenName, apiToken.RateLimitRPM) // Set rate limit headers on all responses w.Header().Set("X-RateLimit-Limit", fmt.Sprintf("%d", apiToken.RateLimitRPM)) w.Header().Set("X-RateLimit-Remaining", fmt.Sprintf("%d", remaining)) w.Header().Set("X-RateLimit-Reset", fmt.Sprintf("%d", resetAt)) if !allowed { retryAfter := resetAt - time.Now().Unix() if retryAfter < 1 { retryAfter = 1 } w.Header().Set("Retry-After", fmt.Sprintf("%d", retryAfter)) writeError(w, http.StatusTooManyRequests, "rate limit exceeded") return } } // Check daily budget if apiToken.DailyBudgetUSD > 0 { spent, err := rl.db.TodaySpend(tokenName) if err == nil && spent >= apiToken.DailyBudgetUSD { writeError(w, http.StatusTooManyRequests, "daily budget exceeded") return } } next.ServeHTTP(w, r) }) } func (rl *RateLimiter) allow(tokenName string, rateLimitRPM int) (bool, int, int64) { rl.mu.Lock() defer rl.mu.Unlock() bucket, ok := rl.buckets[tokenName] if !ok { bucket = &tokenBucket{ tokens: float64(rateLimitRPM), maxTokens: float64(rateLimitRPM), refillRate: float64(rateLimitRPM) / 60.0, lastRefill: time.Now(), } rl.buckets[tokenName] = bucket } now := time.Now() elapsed := now.Sub(bucket.lastRefill).Seconds() bucket.tokens += elapsed * bucket.refillRate if bucket.tokens > bucket.maxTokens { bucket.tokens = bucket.maxTokens } bucket.lastRefill = now remaining := int(math.Floor(bucket.tokens)) if remaining < 0 { remaining = 0 } // Compute reset time: when bucket would be full again deficit := bucket.maxTokens - bucket.tokens var resetAt int64 if deficit > 0 && bucket.refillRate > 0 { resetAt = now.Add(time.Duration(deficit/bucket.refillRate) * time.Second).Unix() } else { resetAt = now.Unix() } if bucket.tokens < 1 { return false, 0, resetAt } bucket.tokens-- remaining = int(math.Floor(bucket.tokens)) if remaining < 0 { remaining = 0 } return true, remaining, resetAt }