mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
49006685b0
## Problem Rate limiting by user is broken (#20857). The rate limit middleware runs before API key extraction, so user ID is never in the request context. This causes: - Rate limiting falls back to IP address for all requests - `X-Coder-Bypass-Ratelimit` header for Owners is ignored (can't verify role without identity) ## Solution Adds `PrecheckAPIKey`, a **root-level middleware** that fully validates the API key on every request (expiry, OIDC refresh, DB updates, role lookup) and stores the result in context. Added **once** at the root router — not duplicated per route group. ### Architecture ``` Request → Root middleware stack: → ExtractRealIP, Logger, ... → PrecheckAPIKey(...) ← validates key, stores result, never rejects → HandleSubdomain(apiRateLimiter) ← workspace apps now also benefit → CORS, CSRF → /api/v2 or /api/experimental: → apiRateLimiter ← reads prechecked result from context → route handlers: → ExtractAPIKeyMW ← reuses prechecked data, adds route-specific logic → handler ``` ### Key design decisions | Decision | Rationale | |---|---| | **Full validation, not lightweight** | Spike's review: "the whole idea of a 'lightweight' extraction that skips security checks is fundamentally flawed." Only fully validated keys are used for rate limiting — expired/invalid keys fall back to IP. | | **Structured error results** | `ValidateAPIKeyError` has a `Hard` flag that maps to `write` vs `optionalWrite`. Hard errors (5xx, OAuth refresh failures) surface even on optional-auth routes. Soft errors (missing/expired token) are swallowed on optional routes. | | **Added once at the root** | Spike's review: "Why can't we add it once at the root?" Root placement means workspace app rate limiters also benefit. | | **Skip prechecked when `SessionTokenFunc != nil`** | `workspaceapps/db.go` uses a custom `SessionTokenFunc` that extracts from `issueReq.SessionToken`. The prechecked result may have validated a different token. Falls back to `ValidateAPIKey` with the custom func. | | **User status check stays in `ExtractAPIKey`** | Dormant activation is route-specific — `ValidateAPIKey` stores status but doesn't enforce it. | | **Audience validation stays in `ExtractAPIKey`** | Depends on `cfg.AccessURL` and request path, uses `optionalWrite(403)` which depends on route config. | ### Changes - **`coderd/httpmw/apikey.go`**: - New `ValidateAPIKey` function — extracted core validation logic, returns structured errors instead of writing HTTP responses - New `PrecheckAPIKey` middleware — calls `ValidateAPIKey`, stores result in `apiKeyPrecheckedContextKey`, never rejects - New types: `ValidateAPIKeyConfig`, `ValidateAPIKeyResult`, `ValidateAPIKeyError`, `APIKeyPrechecked` - Refactored `ExtractAPIKey` — consumes prechecked result from context (skipping redundant validation), falls back to `ValidateAPIKey` when no precheck available - Removed `ExtractAPIKeyForRateLimit` and `preExtractedAPIKey` - **`coderd/httpmw/ratelimit.go`**: Rate limiter checks `apiKeyPrecheckedContextKey` first, then `apiKeyContextKey` fallback (for unit tests / workspace apps), then IP - **`coderd/coderd.go`**: Added `PrecheckAPIKey` once at root `r.Use(...)` block, removed `ExtractAPIKeyForRateLimit` from `/api/v2` and `/api/experimental` - **`coderd/coderd_test.go`**: `TestRateLimitByUser` regression test with `BypassOwner` subtest Fixes #20857
165 lines
5.5 KiB
Go
165 lines
5.5 KiB
Go
package httpmw
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/go-chi/httprate"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/aibridge"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/httpapi"
|
|
"github.com/coder/coder/v2/coderd/rbac"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/coder/v2/cryptorand"
|
|
)
|
|
|
|
// RateLimit returns a handler that limits requests per-minute based
|
|
// on IP, endpoint, and user ID (if available).
|
|
func RateLimit(count int, window time.Duration) func(http.Handler) http.Handler {
|
|
// -1 is no rate limit
|
|
if count <= 0 {
|
|
return func(handler http.Handler) http.Handler {
|
|
return handler
|
|
}
|
|
}
|
|
|
|
return httprate.Limit(
|
|
count,
|
|
window,
|
|
httprate.WithKeyFuncs(func(r *http.Request) (string, error) {
|
|
// Identify the caller. We check two sources:
|
|
//
|
|
// 1. apiKeyPrecheckedContextKey — set by PrecheckAPIKey
|
|
// at the root of the router. Only fully validated
|
|
// keys are used.
|
|
// 2. apiKeyContextKey — set by ExtractAPIKeyMW if it
|
|
// has already run (e.g. unit tests, workspace-app
|
|
// routes that don't go through PrecheckAPIKey).
|
|
//
|
|
// If neither is present, fall back to IP.
|
|
var userID string
|
|
var subject *rbac.Subject
|
|
|
|
if pc, ok := r.Context().Value(apiKeyPrecheckedContextKey{}).(APIKeyPrechecked); ok && pc.Result != nil {
|
|
userID = pc.Result.Key.UserID.String()
|
|
subject = &pc.Result.Subject
|
|
} else if ak, ok := r.Context().Value(apiKeyContextKey{}).(database.APIKey); ok {
|
|
userID = ak.UserID.String()
|
|
if auth, ok := UserAuthorizationOptional(r.Context()); ok {
|
|
subject = &auth
|
|
}
|
|
} else {
|
|
return httprate.KeyByIP(r)
|
|
}
|
|
|
|
if ok, _ := strconv.ParseBool(r.Header.Get(codersdk.BypassRatelimitHeader)); !ok {
|
|
// No bypass attempt, just rate limit by user.
|
|
return userID, nil
|
|
}
|
|
|
|
// Allow Owner to bypass rate limiting for load tests
|
|
// and automation. We avoid using rbac.Authorizer since
|
|
// rego is CPU-intensive and undermines the
|
|
// DoS-prevention goal of the rate limiter.
|
|
if subject == nil {
|
|
// Can't verify roles — rate limit normally.
|
|
return userID, nil
|
|
}
|
|
for _, role := range subject.SafeRoleNames() {
|
|
if role == rbac.RoleOwner() {
|
|
// HACK: use a random key each time to
|
|
// de facto disable rate limiting. The
|
|
// httprate package has no support for
|
|
// selectively changing the limit for
|
|
// particular keys.
|
|
return cryptorand.String(16)
|
|
}
|
|
}
|
|
|
|
return userID, xerrors.Errorf(
|
|
"%q provided but user is not %v",
|
|
codersdk.BypassRatelimitHeader, rbac.RoleOwner(),
|
|
)
|
|
}, httprate.KeyByEndpoint),
|
|
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
|
|
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
|
|
Message: fmt.Sprintf("You've been rate limited for sending more than %v requests in %v.", count, window),
|
|
})
|
|
}),
|
|
)
|
|
}
|
|
|
|
// RateLimitByAuthToken returns a handler that limits requests based on the
|
|
// authentication token in the request.
|
|
//
|
|
// This differs from [RateLimit] in several ways:
|
|
// - It extracts the token directly from request headers (Authorization Bearer
|
|
// or X-Api-Key) rather than from the request context, making it suitable for
|
|
// endpoints that handle authentication internally (like AI Bridge) rather than
|
|
// via [ExtractAPIKeyMW] middleware.
|
|
// - It does not support the bypass header for Owners.
|
|
// - It does not key by endpoint, so the limit applies across all endpoints using
|
|
// this middleware.
|
|
// - It includes a Retry-After header in 429 responses for backpressure signaling.
|
|
//
|
|
// If no token is found in the headers, it falls back to rate limiting by IP address.
|
|
func RateLimitByAuthToken(count int, window time.Duration) func(http.Handler) http.Handler {
|
|
if count <= 0 {
|
|
return func(handler http.Handler) http.Handler {
|
|
return handler
|
|
}
|
|
}
|
|
|
|
return httprate.Limit(
|
|
count,
|
|
window,
|
|
httprate.WithKeyFuncs(func(r *http.Request) (string, error) {
|
|
// Try to extract auth token for per-user rate limiting using
|
|
// AI provider authentication headers (Authorization Bearer or X-Api-Key).
|
|
if token := aibridge.ExtractAuthToken(r.Header); token != "" {
|
|
return token, nil
|
|
}
|
|
// Fall back to IP-based rate limiting if no token present.
|
|
return httprate.KeyByIP(r)
|
|
}),
|
|
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
|
|
// Add Retry-After header for backpressure signaling.
|
|
w.Header().Set("Retry-After", fmt.Sprintf("%d", int(window.Seconds())))
|
|
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
|
|
Message: "You've been rate limited. Please try again later.",
|
|
})
|
|
}),
|
|
)
|
|
}
|
|
|
|
// ConcurrencyLimit returns a handler that limits the number of concurrent
|
|
// requests. When the limit is exceeded, it returns HTTP 503 Service Unavailable.
|
|
func ConcurrencyLimit(maxConcurrent int64, resourceName string) func(http.Handler) http.Handler {
|
|
if maxConcurrent <= 0 {
|
|
return func(handler http.Handler) http.Handler {
|
|
return handler
|
|
}
|
|
}
|
|
|
|
var current atomic.Int64
|
|
return func(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
c := current.Add(1)
|
|
defer current.Add(-1)
|
|
|
|
if c > maxConcurrent {
|
|
httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
|
|
Message: fmt.Sprintf("%s is currently at capacity. Please try again later.", resourceName),
|
|
})
|
|
return
|
|
}
|
|
next.ServeHTTP(w, r)
|
|
})
|
|
}
|
|
}
|