feat(coderd): add overload protection with rate limiting and concurrency control (#21161)

## Summary

This adds configurable overload protection to the AI Bridge daemon to
prevent the server from being overwhelmed during periods of high load.

Partially addresses coder/internal#1153 (rate limits and concurrency
control; circuit breakers are deferred to a follow-up).

## New Configuration Options

| Option | Environment Variable | Description | Default |
|--------|---------------------|-------------|---------|
| `--aibridge-max-concurrency` | `CODER_AIBRIDGE_MAX_CONCURRENCY` |
Maximum number of concurrent AI Bridge requests. Set to 0 to disable
(unlimited). | `0` |
| `--aibridge-rate-limit` | `CODER_AIBRIDGE_RATE_LIMIT` | Maximum number
of AI Bridge requests per second. Set to 0 to disable rate limiting. |
`0` |

## Behavior

When limits are exceeded:
- **Concurrency limit**: Returns HTTP `503 Service Unavailable` with
message "AI Bridge is currently at capacity. Please try again later."
- **Rate limit**: Returns HTTP `429 Too Many Requests` with
`Retry-After` header.

Both protections are optional and disabled by default (0 values).

## Implementation

The overload protection is implemented as reusable middleware in
`coderd/httpmw/ratelimit.go`:

1. **`RateLimitByAuthToken`**: Per-user rate limiting that uses
`APITokenFromRequest` to extract the authentication token, with fallback
to `X-Api-Key` header for AI provider compatibility (e.g., Anthropic).
Falls back to IP-based rate limiting if no token is present. Includes
`Retry-After` header for backpressure signaling.
2. **`ConcurrencyLimit`**: Uses an atomic counter to track in-flight
requests and reject when at capacity.

The middleware is applied in `enterprise/coderd/aibridge.go` via
`r.Group` in the following order:
1. Concurrency check (faster rejection for load shedding)
2. Rate limit check

**Note**: Rate limiting currently applies to all AI Bridge requests,
including pass-through requests. Ideally only actual interceptions
should count, but this would require changes in the aibridge library.

## Testing

Added comprehensive tests for:
- Rate limiting by auth token (Bearer token, X-Api-Key, no token
fallback to IP)
- Different tokens not rate limited against each other
- Disabled when limit is zero
- Retry-After header is set on 429 responses
- Concurrency limiting (allows within limit, rejects over limit,
disabled when zero)
This commit is contained in:
Kacper Sawicki
2025-12-11 16:38:54 +01:00
committed by GitHub
parent 8ead6f795d
commit 6f86f67754
17 changed files with 568 additions and 37 deletions
+8
View File
@@ -125,12 +125,20 @@ AI BRIDGE OPTIONS:
requests (requires the "oauth2" and "mcp-server-http" experiments to
be enabled).
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
Maximum number of concurrent AI Bridge requests per replica. Set to 0
to disable (unlimited).
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
The base URL of the OpenAI API.
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
The key to authenticate against the OpenAI API.
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
Maximum number of AI Bridge requests per second per replica. Set to 0
to disable (unlimited).
CLIENT OPTIONS:
These options change the behavior of how clients interact with the Coder.
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
+8
View File
@@ -748,6 +748,14 @@ aibridge:
# (token, prompt, tool use).
# (default: 60d, type: duration)
retention: 1440h0m0s
# Maximum number of concurrent AI Bridge requests per replica. Set to 0 to disable
# (unlimited).
# (default: 0, type: int)
maxConcurrency: 0
# Maximum number of AI Bridge requests per second per replica. Set to 0 to disable
# (unlimited).
# (default: 0, type: int)
rateLimit: 0
# Configure data retention policies for various database tables. Retention
# policies automatically purge old data to reduce database size and improve
# performance. Setting a retention duration to 0 disables automatic purging for
+24
View File
@@ -0,0 +1,24 @@
// Package aibridge provides utilities for the AI Bridge feature.
package aibridge
import (
"net/http"
"strings"
)
// ExtractAuthToken extracts an authorization token from HTTP headers.
// It checks the Authorization header (Bearer token) and X-Api-Key header,
// which represent the different ways clients authenticate against AI providers.
// If neither are present, an empty string is returned.
func ExtractAuthToken(header http.Header) string {
if auth := strings.TrimSpace(header.Get("Authorization")); auth != "" {
fields := strings.Fields(auth)
if len(fields) == 2 && strings.EqualFold(fields[0], "Bearer") {
return fields[1]
}
}
if apiKey := strings.TrimSpace(header.Get("X-Api-Key")); apiKey != "" {
return apiKey
}
return ""
}
+6
View File
@@ -11883,9 +11883,15 @@ const docTemplate = `{
"inject_coder_mcp_tools": {
"type": "boolean"
},
"max_concurrency": {
"type": "integer"
},
"openai": {
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
},
"rate_limit": {
"type": "integer"
},
"retention": {
"type": "integer"
}
+6
View File
@@ -10549,9 +10549,15 @@
"inject_coder_mcp_tools": {
"type": "boolean"
},
"max_concurrency": {
"type": "integer"
},
"openai": {
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
},
"rate_limit": {
"type": "integer"
},
"retention": {
"type": "integer"
}
+71
View File
@@ -4,11 +4,13 @@ import (
"fmt"
"net/http"
"strconv"
"sync/atomic"
"time"
"github.com/go-chi/httprate"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/aibridge"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/rbac"
@@ -70,3 +72,72 @@ func RateLimit(count int, window time.Duration) func(http.Handler) http.Handler
}),
)
}
// RateLimitByAuthToken returns a handler that limits requests based on the
// authentication token in the request.
//
// This differs from [RateLimit] in several ways:
// - It extracts the token directly from request headers (Authorization Bearer
// or X-Api-Key) rather than from the request context, making it suitable for
// endpoints that handle authentication internally (like AI Bridge) rather than
// via [ExtractAPIKeyMW] middleware.
// - It does not support the bypass header for Owners.
// - It does not key by endpoint, so the limit applies across all endpoints using
// this middleware.
// - It includes a Retry-After header in 429 responses for backpressure signaling.
//
// If no token is found in the headers, it falls back to rate limiting by IP address.
func RateLimitByAuthToken(count int, window time.Duration) func(http.Handler) http.Handler {
if count <= 0 {
return func(handler http.Handler) http.Handler {
return handler
}
}
return httprate.Limit(
count,
window,
httprate.WithKeyFuncs(func(r *http.Request) (string, error) {
// Try to extract auth token for per-user rate limiting using
// AI provider authentication headers (Authorization Bearer or X-Api-Key).
if token := aibridge.ExtractAuthToken(r.Header); token != "" {
return token, nil
}
// Fall back to IP-based rate limiting if no token present.
return httprate.KeyByIP(r)
}),
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
// Add Retry-After header for backpressure signaling.
w.Header().Set("Retry-After", fmt.Sprintf("%d", int(window.Seconds())))
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
Message: "You've been rate limited. Please try again later.",
})
}),
)
}
// ConcurrencyLimit returns a handler that limits the number of concurrent
// requests. When the limit is exceeded, it returns HTTP 503 Service Unavailable.
func ConcurrencyLimit(maxConcurrent int64, resourceName string) func(http.Handler) http.Handler {
if maxConcurrent <= 0 {
return func(handler http.Handler) http.Handler {
return handler
}
}
var current atomic.Int64
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
c := current.Add(1)
defer current.Add(-1)
if c > maxConcurrent {
httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
Message: fmt.Sprintf("%s is currently at capacity. Please try again later.", resourceName),
})
return
}
next.ServeHTTP(w, r)
})
}
}
+210
View File
@@ -6,6 +6,7 @@ import (
"net"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
@@ -17,6 +18,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/testutil"
)
func randRemoteAddr() string {
@@ -145,3 +147,211 @@ func TestRateLimit(t *testing.T) {
}
})
}
func TestRateLimitByAuthToken(t *testing.T) {
t.Parallel()
t.Run("LimitsByAuthHeader", func(t *testing.T) {
t.Parallel()
tests := []struct {
name string
headerName string
headerVal string
}{
{
name: "BearerToken",
headerName: "Authorization",
headerVal: "Bearer test-token-123",
},
{
name: "XApiKey",
headerName: "X-Api-Key",
headerVal: "test-api-key-456",
},
{
name: "NoToken",
headerName: "",
headerVal: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
rtr := chi.NewRouter()
rtr.Use(httpmw.RateLimitByAuthToken(2, time.Hour))
rtr.Get("/", func(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusOK)
})
// Same token (or IP if no token) should be rate limited after 2 requests.
for i := 0; i < 5; i++ {
req := httptest.NewRequest("GET", "/", nil)
if tt.headerName != "" {
req.Header.Set(tt.headerName, tt.headerVal)
}
rec := httptest.NewRecorder()
rtr.ServeHTTP(rec, req)
resp := rec.Result()
_ = resp.Body.Close()
if i < 2 {
require.Equal(t, http.StatusOK, resp.StatusCode, "request %d should succeed", i)
} else {
require.Equal(t, http.StatusTooManyRequests, resp.StatusCode, "request %d should be rate limited", i)
// Verify Retry-After header is set.
require.NotEmpty(t, resp.Header.Get("Retry-After"), "Retry-After header should be set")
}
}
})
}
})
t.Run("DifferentTokensNotLimited", func(t *testing.T) {
t.Parallel()
rtr := chi.NewRouter()
rtr.Use(httpmw.RateLimitByAuthToken(1, time.Hour))
rtr.Get("/", func(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusOK)
})
// Different tokens should not be rate limited against each other.
for i := 0; i < 5; i++ {
req := httptest.NewRequest("GET", "/", nil)
req.Header.Set("Authorization", fmt.Sprintf("Bearer token-%d", i))
rec := httptest.NewRecorder()
rtr.ServeHTTP(rec, req)
resp := rec.Result()
_ = resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode, "request %d should succeed", i)
}
})
t.Run("DisabledWhenZero", func(t *testing.T) {
t.Parallel()
rtr := chi.NewRouter()
rtr.Use(httpmw.RateLimitByAuthToken(0, time.Hour))
rtr.Get("/", func(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusOK)
})
// Should not be rate limited when limit is 0.
for i := 0; i < 10; i++ {
req := httptest.NewRequest("GET", "/", nil)
req.Header.Set("Authorization", "Bearer same-token")
rec := httptest.NewRecorder()
rtr.ServeHTTP(rec, req)
resp := rec.Result()
_ = resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
}
})
}
func TestConcurrencyLimit(t *testing.T) {
t.Parallel()
t.Run("LimitsConcurrentRequests", func(t *testing.T) {
t.Parallel()
const maxConcurrency = 2
rtr := chi.NewRouter()
rtr.Use(httpmw.ConcurrencyLimit(maxConcurrency, "Test"))
// Use a WaitGroup as a barrier to ensure all requests are in the handler
// before any of them proceed.
var handlersReady sync.WaitGroup
handlersReady.Add(maxConcurrency)
releaseHandler := make(chan struct{})
rtr.Get("/", func(rw http.ResponseWriter, r *http.Request) {
handlersReady.Done()
// Wait until released.
<-releaseHandler
rw.WriteHeader(http.StatusOK)
})
server := httptest.NewServer(rtr)
defer server.Close()
ctx := testutil.Context(t, testutil.WaitShort)
// Start maxConcurrency requests that will block.
// We use channels to collect errors instead of require in goroutines.
type result struct {
statusCode int
err error
}
results := make(chan result, maxConcurrency)
var wg sync.WaitGroup
for i := 0; i < maxConcurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, server.URL+"/", nil)
if err != nil {
results <- result{err: err}
return
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
results <- result{err: err}
return
}
defer resp.Body.Close()
results <- result{statusCode: resp.StatusCode}
}()
}
// Wait for all requests to enter the handler with a timeout.
handlersReadyCh := make(chan struct{})
go func() {
handlersReady.Wait()
close(handlersReadyCh)
}()
select {
case <-handlersReadyCh:
case <-ctx.Done():
t.Fatal("timed out waiting for handlers to be ready")
}
// Next request should be rejected since we're at capacity.
req, err := http.NewRequestWithContext(ctx, http.MethodGet, server.URL+"/", nil)
require.NoError(t, err)
resp, err := http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
// Release all blocked requests.
close(releaseHandler)
wg.Wait()
close(results)
// Check all goroutine results.
for res := range results {
require.NoError(t, res.err)
require.Equal(t, http.StatusOK, res.statusCode)
}
})
t.Run("DisabledWhenZero", func(t *testing.T) {
t.Parallel()
rtr := chi.NewRouter()
rtr.Use(httpmw.ConcurrencyLimit(0, "Test"))
rtr.Get("/", func(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusOK)
})
// Should not be limited when maxConcurrency is 0.
for i := 0; i < 10; i++ {
req := httptest.NewRequest("GET", "/", nil)
rec := httptest.NewRecorder()
rtr.ServeHTTP(rec, req)
resp := rec.Result()
_ = resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
}
})
}
+22
View File
@@ -3401,6 +3401,26 @@ Write out the current server config as YAML to stdout.`,
YAML: "retention",
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
},
{
Name: "AI Bridge Max Concurrency",
Description: "Maximum number of concurrent AI Bridge requests per replica. Set to 0 to disable (unlimited).",
Flag: "aibridge-max-concurrency",
Env: "CODER_AIBRIDGE_MAX_CONCURRENCY",
Value: &c.AI.BridgeConfig.MaxConcurrency,
Default: "0",
Group: &deploymentGroupAIBridge,
YAML: "maxConcurrency",
},
{
Name: "AI Bridge Rate Limit",
Description: "Maximum number of AI Bridge requests per second per replica. Set to 0 to disable (unlimited).",
Flag: "aibridge-rate-limit",
Env: "CODER_AIBRIDGE_RATE_LIMIT",
Value: &c.AI.BridgeConfig.RateLimit,
Default: "0",
Group: &deploymentGroupAIBridge,
YAML: "rateLimit",
},
// Retention settings
{
Name: "Audit Logs Retention",
@@ -3471,6 +3491,8 @@ type AIBridgeConfig struct {
Bedrock AIBridgeBedrockConfig `json:"bedrock" typescript:",notnull"`
InjectCoderMCPTools serpent.Bool `json:"inject_coder_mcp_tools" typescript:",notnull"`
Retention serpent.Duration `json:"retention" typescript:",notnull"`
MaxConcurrency serpent.Int64 `json:"max_concurrency" typescript:",notnull"`
RateLimit serpent.Int64 `json:"rate_limit" typescript:",notnull"`
}
type AIBridgeOpenAIConfig struct {
+2
View File
@@ -176,10 +176,12 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"retention": 0
}
},
+10
View File
@@ -390,10 +390,12 @@
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"retention": 0
}
```
@@ -406,7 +408,9 @@
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | |
| `enabled` | boolean | false | | |
| `inject_coder_mcp_tools` | boolean | false | | |
| `max_concurrency` | integer | false | | |
| `openai` | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig) | false | | |
| `rate_limit` | integer | false | | |
| `retention` | integer | false | | |
## codersdk.AIBridgeInterception
@@ -700,10 +704,12 @@
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"retention": 0
}
}
@@ -2860,10 +2866,12 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"retention": 0
}
},
@@ -3383,10 +3391,12 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"retention": 0
}
},
+22
View File
@@ -1781,6 +1781,28 @@ Whether to inject Coder's MCP tools into intercepted AI Bridge requests (require
Length of time to retain data such as interceptions and all related records (token, prompt, tool use).
### --aibridge-max-concurrency
| | |
|-------------|----------------------------------------------|
| Type | <code>int</code> |
| Environment | <code>$CODER_AIBRIDGE_MAX_CONCURRENCY</code> |
| YAML | <code>aibridge.maxConcurrency</code> |
| Default | <code>0</code> |
Maximum number of concurrent AI Bridge requests per replica. Set to 0 to disable (unlimited).
### --aibridge-rate-limit
| | |
|-------------|-----------------------------------------|
| Type | <code>int</code> |
| Environment | <code>$CODER_AIBRIDGE_RATE_LIMIT</code> |
| YAML | <code>aibridge.rateLimit</code> |
| Default | <code>0</code> |
Maximum number of AI Bridge requests per second per replica. Set to 0 to disable (unlimited).
### --audit-logs-retention
| | |
+2 -1
View File
@@ -17,6 +17,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/aibridge"
agplaibridge "github.com/coder/coder/v2/coderd/aibridge"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/aibridged"
mock "github.com/coder/coder/v2/enterprise/aibridged/aibridgedmock"
@@ -220,7 +221,7 @@ func TestExtractAuthToken(t *testing.T) {
for k, v := range tc.headers {
headers.Add(k, v)
}
key := aibridged.ExtractAuthToken(headers)
key := agplaibridge.ExtractAuthToken(headers)
require.Equal(t, tc.expectedKey, key)
})
}
+2 -18
View File
@@ -9,6 +9,7 @@ import (
"cdr.dev/slog"
"github.com/coder/aibridge"
agplaibridge "github.com/coder/coder/v2/coderd/aibridge"
"github.com/coder/coder/v2/enterprise/aibridged/proto"
)
@@ -35,7 +36,7 @@ func (s *Server) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
logger := s.logger.With(slog.F("path", r.URL.Path))
key := strings.TrimSpace(ExtractAuthToken(r.Header))
key := strings.TrimSpace(agplaibridge.ExtractAuthToken(r.Header))
if key == "" {
logger.Warn(ctx, "no auth key provided")
http.Error(rw, ErrNoAuthKey.Error(), http.StatusBadRequest)
@@ -79,20 +80,3 @@ func (s *Server) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
handler.ServeHTTP(rw, r)
}
// ExtractAuthToken extracts authorization token from HTTP request using multiple sources.
// These sources represent the different ways clients authenticate against AI providers.
// It checks the Authorization header (Bearer token) and X-Api-Key header.
// If neither are present, an empty string is returned.
func ExtractAuthToken(header http.Header) string {
if auth := strings.TrimSpace(header.Get("Authorization")); auth != "" {
fields := strings.Fields(auth)
if len(fields) == 2 && strings.EqualFold(fields[0], "Bearer") {
return fields[1]
}
}
if apiKey := strings.TrimSpace(header.Get("X-Api-Key")); apiKey != "" {
return apiKey
}
return ""
}
+8
View File
@@ -126,12 +126,20 @@ AI BRIDGE OPTIONS:
requests (requires the "oauth2" and "mcp-server-http" experiments to
be enabled).
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
Maximum number of concurrent AI Bridge requests per replica. Set to 0
to disable (unlimited).
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
The base URL of the OpenAI API.
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
The key to authenticate against the OpenAI API.
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
Maximum number of AI Bridge requests per second per replica. Set to 0
to disable (unlimited).
CLIENT OPTIONS:
These options change the behavior of how clients interact with the Coder.
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
+33 -18
View File
@@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"strings"
"time"
"github.com/go-chi/chi/v5"
"github.com/google/uuid"
@@ -23,10 +24,19 @@ import (
const (
maxListInterceptionsLimit = 1000
defaultListInterceptionsLimit = 100
// aiBridgeRateLimitWindow is the fixed duration for rate limiting AI Bridge
// requests. This is hardcoded to keep configuration simple.
aiBridgeRateLimitWindow = time.Second
)
// aibridgeHandler handles all aibridged-related endpoints.
func aibridgeHandler(api *API, middlewares ...func(http.Handler) http.Handler) func(r chi.Router) {
// Build the overload protection middleware chain for the aibridged handler.
// These limits are applied per-replica.
bridgeCfg := api.DeploymentValues.AI.BridgeConfig
concurrencyLimiter := httpmw.ConcurrencyLimit(bridgeCfg.MaxConcurrency.Value(), "AI Bridge")
rateLimiter := httpmw.RateLimitByAuthToken(int(bridgeCfg.RateLimit.Value()), aiBridgeRateLimitWindow)
return func(r chi.Router) {
r.Use(api.RequireFeatureMW(codersdk.FeatureAIBridge))
r.Group(func(r chi.Router) {
@@ -34,25 +44,30 @@ func aibridgeHandler(api *API, middlewares ...func(http.Handler) http.Handler) f
r.Get("/interceptions", api.aiBridgeListInterceptions)
})
// This is a bit funky but since aibridge only exposes a HTTP
// handler, this is how it has to be.
r.HandleFunc("/*", func(rw http.ResponseWriter, r *http.Request) {
if api.aibridgedHandler == nil {
httpapi.Write(r.Context(), rw, http.StatusNotFound, codersdk.Response{
Message: "aibridged handler not mounted",
})
return
}
// Strip either the experimental or stable prefix.
// TODO: experimental route is deprecated and must be removed with Beta.
prefixes := []string{"/api/experimental/aibridge", "/api/v2/aibridge"}
for _, prefix := range prefixes {
if strings.Contains(r.URL.String(), prefix) {
http.StripPrefix(prefix, api.aibridgedHandler).ServeHTTP(rw, r)
break
// Apply overload protection middleware to the aibridged handler.
// Concurrency limit is checked first for faster rejection under load.
r.Group(func(r chi.Router) {
r.Use(concurrencyLimiter, rateLimiter)
// This is a bit funky but since aibridge only exposes a HTTP
// handler, this is how it has to be.
r.HandleFunc("/*", func(rw http.ResponseWriter, r *http.Request) {
if api.aibridgedHandler == nil {
httpapi.Write(r.Context(), rw, http.StatusNotFound, codersdk.Response{
Message: "aibridged handler not mounted",
})
return
}
}
// Strip either the experimental or stable prefix.
// TODO: experimental route is deprecated and must be removed with Beta.
prefixes := []string{"/api/experimental/aibridge", "/api/v2/aibridge"}
for _, prefix := range prefixes {
if strings.Contains(r.URL.String(), prefix) {
http.StripPrefix(prefix, api.aibridgedHandler).ServeHTTP(rw, r)
break
}
}
})
})
}
}
+132
View File
@@ -703,3 +703,135 @@ func TestAIBridgeRouting(t *testing.T) {
})
}
}
func TestAIBridgeRateLimiting(t *testing.T) {
t.Parallel()
dv := coderdtest.DeploymentValues(t)
// Set a low rate limit for testing.
dv.AI.BridgeConfig.RateLimit = 2
client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
Options: &coderdtest.Options{
DeploymentValues: dv,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAIBridge: 1,
},
},
})
t.Cleanup(func() {
_ = closer.Close()
})
// Register a simple test handler.
testHandler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.WriteHeader(http.StatusOK)
})
api.RegisterInMemoryAIBridgedHTTPHandler(testHandler)
ctx := testutil.Context(t, testutil.WaitLong)
httpClient := &http.Client{}
url := client.URL.String() + "/api/v2/aibridge/test"
// Make requests up to the limit - should succeed.
for range 2 {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil)
require.NoError(t, err)
req.Header.Set(codersdk.SessionTokenHeader, client.SessionToken())
resp, err := httpClient.Do(req)
require.NoError(t, err)
_ = resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
}
// Next request should be rate limited.
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil)
require.NoError(t, err)
req.Header.Set(codersdk.SessionTokenHeader, client.SessionToken())
resp, err := httpClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
require.NotEmpty(t, resp.Header.Get("Retry-After"))
}
func TestAIBridgeConcurrencyLimiting(t *testing.T) {
t.Parallel()
dv := coderdtest.DeploymentValues(t)
// Set a low concurrency limit for testing.
dv.AI.BridgeConfig.MaxConcurrency = 1
client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
Options: &coderdtest.Options{
DeploymentValues: dv,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAIBridge: 1,
},
},
})
t.Cleanup(func() {
_ = closer.Close()
})
// Register a handler that blocks until signaled.
started := make(chan struct{})
unblock := make(chan struct{})
testHandler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
started <- struct{}{}
<-unblock
rw.WriteHeader(http.StatusOK)
})
api.RegisterInMemoryAIBridgedHTTPHandler(testHandler)
ctx := testutil.Context(t, testutil.WaitLong)
httpClient := &http.Client{}
url := client.URL.String() + "/api/v2/aibridge/test"
// Start a request that will block.
done := make(chan struct{})
go func() {
defer close(done)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil)
if err != nil {
return
}
req.Header.Set(codersdk.SessionTokenHeader, client.SessionToken())
resp, err := httpClient.Do(req)
if err == nil {
_ = resp.Body.Close()
}
}()
// Wait for the first request to start processing.
select {
case <-started:
case <-ctx.Done():
t.Fatal("timed out waiting for first request to start")
}
// Second request should be rejected with 503.
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil)
require.NoError(t, err)
req.Header.Set(codersdk.SessionTokenHeader, client.SessionToken())
resp, err := httpClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
// Unblock the first request and wait for it to complete.
close(unblock)
select {
case <-done:
case <-ctx.Done():
t.Fatal("timed out waiting for first request to complete")
}
}
+2
View File
@@ -33,6 +33,8 @@ export interface AIBridgeConfig {
readonly bedrock: AIBridgeBedrockConfig;
readonly inject_coder_mcp_tools: boolean;
readonly retention: number;
readonly max_concurrency: number;
readonly rate_limit: number;
}
// From codersdk/aibridge.go