ai-servers/llm-gateway/internal/proxy/concurrency.go
Ray Andrew 90adf6f3a8
feat(gateway): add circuit breaker, retry, and concurrency limit support
feat(gateway): add debug logging with file storage and retention

feat(gateway): add audit logging for user actions

feat(gateway): add request ID tracking and rate limit headers

feat(gateway): add model aliases and load balancing strategies

feat(gateway): add config hot-reload via SIGHUP

feat(gateway): add CORS support

feat(gateway): add data export API and dashboard endpoints

feat(gateway): add dashboard pages for audit and debug logs

feat(gateway): add concurrent request limiting per token

feat(gateway): add streaming timeout support

feat(gateway): add migration support for new schema fields
2026-02-15 04:21:40 -06:00

51 lines
1.1 KiB
Go

package proxy
import (
"net/http"
"sync"
"sync/atomic"
)
// ConcurrencyLimiter enforces per-token concurrent request limits.
type ConcurrencyLimiter struct {
mu sync.Mutex
counters map[string]*atomic.Int64
}
func NewConcurrencyLimiter() *ConcurrencyLimiter {
return &ConcurrencyLimiter{
counters: make(map[string]*atomic.Int64),
}
}
func (cl *ConcurrencyLimiter) getCounter(tokenName string) *atomic.Int64 {
cl.mu.Lock()
defer cl.mu.Unlock()
c, ok := cl.counters[tokenName]
if !ok {
c = &atomic.Int64{}
cl.counters[tokenName] = c
}
return c
}
func (cl *ConcurrencyLimiter) Check(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
apiToken := getAPIToken(r.Context())
if apiToken == nil || apiToken.MaxConcurrent <= 0 {
next.ServeHTTP(w, r)
return
}
counter := cl.getCounter(apiToken.Name)
current := counter.Add(1)
defer counter.Add(-1)
if current > int64(apiToken.MaxConcurrent) {
writeError(w, http.StatusTooManyRequests, "concurrent request limit exceeded")
return
}
next.ServeHTTP(w, r)
})
}