mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: remove 429 from aibridge circuit breaker failure conditions (#24701)
## Description Removes 429 (Too Many Requests) from the circuit breaker failure conditions. Rate limiting is now handled by automatic key failover instead of tripping the circuit breaker. ## Changes `DefaultIsFailure` no longer treats 429 as a circuit breaker failure. The circuit breaker now only trips on server overload responses (503, 529). Tests and integration tests updated to use 503 instead of 429 for tripping circuits. Description strings in deployment config updated to reflect the change. Closes https://github.com/coder/internal/issues/1445 > [!NOTE] > Initially generated by Coder Agents, modified and reviewed by @ssncferreira
This commit is contained in:
@@ -20,13 +20,15 @@ import (
|
||||
// and the request was rejected without calling the handler.
|
||||
var ErrCircuitOpen = xerrors.New("circuit breaker is open")
|
||||
|
||||
// DefaultIsFailure returns true for standard HTTP status codes that typically
|
||||
// indicate upstream overload.
|
||||
// DefaultIsFailure returns true for standard HTTP status codes that
|
||||
// typically indicate upstream overload.
|
||||
//
|
||||
// Note: 429 (Too Many Requests) is intentionally excluded. Rate
|
||||
// limits are key-specific and handled by automatic key failover.
|
||||
func DefaultIsFailure(statusCode int) bool {
|
||||
switch statusCode {
|
||||
case http.StatusTooManyRequests, // 429
|
||||
http.StatusServiceUnavailable, // 503
|
||||
http.StatusGatewayTimeout: // 504
|
||||
case http.StatusServiceUnavailable, // 503
|
||||
http.StatusGatewayTimeout: // 504
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
|
||||
@@ -32,11 +32,11 @@ func TestExecute_PerModelIsolation(t *testing.T) {
|
||||
sonnetModel := "claude-sonnet-4-20250514"
|
||||
haikuModel := "claude-3-5-haiku-20241022"
|
||||
|
||||
// Trip circuit on sonnet model (returns 429)
|
||||
// Trip circuit on sonnet model (returns 503)
|
||||
w := httptest.NewRecorder()
|
||||
err := cbs.Execute(endpoint, sonnetModel, w, func(rw http.ResponseWriter) error {
|
||||
sonnetCalls.Add(1)
|
||||
rw.WriteHeader(http.StatusTooManyRequests)
|
||||
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
@@ -79,11 +79,11 @@ func TestExecute_PerEndpointIsolation(t *testing.T) {
|
||||
|
||||
model := "test-model"
|
||||
|
||||
// Trip circuit on /v1/messages endpoint (returns 429)
|
||||
// Trip circuit on /v1/messages endpoint (returns 503)
|
||||
w := httptest.NewRecorder()
|
||||
err := cbs.Execute("/v1/messages", model, w, func(rw http.ResponseWriter) error {
|
||||
messagesCalls.Add(1)
|
||||
rw.WriteHeader(http.StatusTooManyRequests)
|
||||
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
@@ -179,7 +179,7 @@ func TestExecute_OnStateChange(t *testing.T) {
|
||||
// Trip circuit
|
||||
w := httptest.NewRecorder()
|
||||
err := cbs.Execute(endpoint, model, w, func(rw http.ResponseWriter) error {
|
||||
rw.WriteHeader(http.StatusTooManyRequests)
|
||||
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
@@ -202,7 +202,7 @@ func TestDefaultIsFailure(t *testing.T) {
|
||||
{http.StatusOK, false},
|
||||
{http.StatusBadRequest, false},
|
||||
{http.StatusUnauthorized, false},
|
||||
{http.StatusTooManyRequests, true}, // 429
|
||||
{http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker
|
||||
{http.StatusInternalServerError, false},
|
||||
{http.StatusBadGateway, false},
|
||||
{http.StatusServiceUnavailable, true}, // 503
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
// Common response bodies for circuit breaker tests.
|
||||
const (
|
||||
anthropicRateLimitError = `{"type":"error","error":{"type":"rate_limit_error","message":"rate limited"}}`
|
||||
openAIRateLimitError = `{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`
|
||||
anthropicOverloadedError = `{"type":"error","error":{"type":"api_error","message":"Internal server error"}}`
|
||||
openAIOverloadedError = `{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`
|
||||
)
|
||||
|
||||
func anthropicSuccessResponse(model string) string {
|
||||
@@ -59,7 +59,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
||||
expectProvider: config.ProviderAnthropic,
|
||||
expectEndpoint: "/v1/messages",
|
||||
expectModel: "claude-sonnet-4-20250514",
|
||||
errorBody: anthropicRateLimitError,
|
||||
errorBody: anthropicOverloadedError,
|
||||
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{
|
||||
@@ -80,7 +80,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
||||
expectProvider: config.ProviderOpenAI,
|
||||
expectEndpoint: "/v1/chat/completions",
|
||||
expectModel: "gpt-4o",
|
||||
errorBody: openAIRateLimitError,
|
||||
errorBody: openAIOverloadedError,
|
||||
successBody: openAISuccessResponse("gpt-4o"),
|
||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||
@@ -103,14 +103,14 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
||||
var shouldFail atomic.Bool
|
||||
shouldFail.Store(true)
|
||||
|
||||
// Mock upstream that returns 429 or 200 based on shouldFail flag.
|
||||
// Mock upstream that returns 503 or 200 based on shouldFail flag.
|
||||
// x-should-retry: false is required to disable SDK automatic retries (default MaxRetries=2).
|
||||
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
upstreamCalls.Add(1)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("x-should-retry", "false")
|
||||
if shouldFail.Load() {
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte(tc.errorBody))
|
||||
} else {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
@@ -146,10 +146,10 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
||||
}
|
||||
|
||||
// Phase 1: Trip the circuit breaker
|
||||
// First FailureThreshold requests hit upstream, get 429
|
||||
// First FailureThreshold requests hit upstream, get 503
|
||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||
status := doRequest()
|
||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
||||
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||
}
|
||||
//nolint:gosec // G115: test constant, no overflow risk
|
||||
assert.Equal(t, int32(cbConfig.FailureThreshold), upstreamCalls.Load())
|
||||
@@ -227,7 +227,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
||||
expectProvider: config.ProviderAnthropic,
|
||||
expectEndpoint: "/v1/messages",
|
||||
expectModel: "claude-sonnet-4-20250514",
|
||||
errorBody: anthropicRateLimitError,
|
||||
errorBody: anthropicOverloadedError,
|
||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{
|
||||
"x-api-key": {"test"},
|
||||
@@ -247,7 +247,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
||||
expectProvider: config.ProviderOpenAI,
|
||||
expectEndpoint: "/v1/chat/completions",
|
||||
expectModel: "gpt-4o",
|
||||
errorBody: openAIRateLimitError,
|
||||
errorBody: openAIOverloadedError,
|
||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||
path: pathOpenAIChatCompletions,
|
||||
@@ -267,12 +267,12 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
||||
|
||||
var upstreamCalls atomic.Int32
|
||||
|
||||
// Mock upstream that always returns 429.
|
||||
// Mock upstream that always returns 503.
|
||||
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
upstreamCalls.Add(1)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("x-should-retry", "false")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte(tc.errorBody))
|
||||
}))
|
||||
defer mockUpstream.Close()
|
||||
@@ -305,7 +305,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
||||
// Phase 1: Trip the circuit
|
||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||
status := doRequest()
|
||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
||||
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||
}
|
||||
|
||||
// Verify circuit is open
|
||||
@@ -321,7 +321,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
||||
// Phase 3: Request in half-open state fails, circuit should re-open
|
||||
upstreamCallsBefore := upstreamCalls.Load()
|
||||
status = doRequest()
|
||||
assert.Equal(t, http.StatusTooManyRequests, status, "Request should fail in half-open state")
|
||||
assert.Equal(t, http.StatusServiceUnavailable, status, "Request should fail in half-open state")
|
||||
assert.Equal(t, upstreamCallsBefore+1, upstreamCalls.Load(), "Request should reach upstream in half-open state")
|
||||
|
||||
// Circuit should be open again - next request should be rejected immediately
|
||||
@@ -363,7 +363,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
||||
expectProvider: config.ProviderAnthropic,
|
||||
expectEndpoint: "/v1/messages",
|
||||
expectModel: "claude-sonnet-4-20250514",
|
||||
errorBody: anthropicRateLimitError,
|
||||
errorBody: anthropicOverloadedError,
|
||||
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{
|
||||
@@ -384,7 +384,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
||||
expectProvider: config.ProviderOpenAI,
|
||||
expectEndpoint: "/v1/chat/completions",
|
||||
expectModel: "gpt-4o",
|
||||
errorBody: openAIRateLimitError,
|
||||
errorBody: openAIOverloadedError,
|
||||
successBody: openAISuccessResponse("gpt-4o"),
|
||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||
@@ -413,7 +413,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("x-should-retry", "false")
|
||||
if shouldFail.Load() {
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte(tc.errorBody))
|
||||
} else {
|
||||
// Slow response to ensure requests overlap
|
||||
@@ -453,7 +453,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
||||
// Phase 1: Trip the circuit
|
||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||
status := doRequest()
|
||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
||||
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||
}
|
||||
|
||||
// Verify circuit is open
|
||||
@@ -529,8 +529,8 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) {
|
||||
if strings.Contains(string(body), "claude-sonnet-4-20250514") {
|
||||
sonnetCalls.Add(1)
|
||||
if sonnetShouldFail.Load() {
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(anthropicRateLimitError))
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte(anthropicOverloadedError))
|
||||
} else {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(anthropicSuccessResponse("claude-sonnet-4-20250514")))
|
||||
@@ -578,7 +578,7 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) {
|
||||
// Phase 1: Trip the circuit for sonnet model
|
||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||
status := doRequest("claude-sonnet-4-20250514")
|
||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
||||
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||
}
|
||||
//nolint:gosec // G115: test constant, no overflow risk
|
||||
assert.Equal(t, int32(cbConfig.FailureThreshold), sonnetCalls.Load())
|
||||
|
||||
@@ -360,7 +360,7 @@ func Test_anthropicIsFailure(t *testing.T) {
|
||||
{http.StatusOK, false},
|
||||
{http.StatusBadRequest, false},
|
||||
{http.StatusUnauthorized, false},
|
||||
{http.StatusTooManyRequests, true}, // 429
|
||||
{http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker
|
||||
{http.StatusInternalServerError, false},
|
||||
{http.StatusBadGateway, false},
|
||||
{http.StatusServiceUnavailable, true}, // 503
|
||||
|
||||
+1
-1
@@ -138,7 +138,7 @@ AI BRIDGE OPTIONS:
|
||||
|
||||
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
||||
Enable the circuit breaker to protect against cascading failures from
|
||||
upstream AI provider rate limits (429, 503, 529 overloaded).
|
||||
upstream AI provider overload (503, 529).
|
||||
|
||||
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
||||
Length of time to retain data such as interceptions and all related
|
||||
|
||||
+1
-1
@@ -821,7 +821,7 @@ aibridge:
|
||||
# (default: true, type: bool)
|
||||
allow_byok: true
|
||||
# Enable the circuit breaker to protect against cascading failures from upstream
|
||||
# AI provider rate limits (429, 503, 529 overloaded).
|
||||
# AI provider overload (503, 529).
|
||||
# (default: false, type: bool)
|
||||
circuit_breaker_enabled: false
|
||||
# Number of consecutive failures that triggers the circuit breaker to open.
|
||||
|
||||
Generated
+1
-1
@@ -13138,7 +13138,7 @@ const docTemplate = `{
|
||||
]
|
||||
},
|
||||
"circuit_breaker_enabled": {
|
||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).",
|
||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"circuit_breaker_failure_threshold": {
|
||||
|
||||
Generated
+1
-1
@@ -11686,7 +11686,7 @@
|
||||
]
|
||||
},
|
||||
"circuit_breaker_enabled": {
|
||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).",
|
||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"circuit_breaker_failure_threshold": {
|
||||
|
||||
@@ -3823,7 +3823,7 @@ Write out the current server config as YAML to stdout.`,
|
||||
},
|
||||
{
|
||||
Name: "AI Bridge Circuit Breaker Enabled",
|
||||
Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).",
|
||||
Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).",
|
||||
Flag: "aibridge-circuit-breaker-enabled",
|
||||
Env: "CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED",
|
||||
Value: &c.AI.BridgeConfig.CircuitBreakerEnabled,
|
||||
@@ -4074,7 +4074,7 @@ type AIBridgeConfig struct {
|
||||
SendActorHeaders serpent.Bool `json:"send_actor_headers" typescript:",notnull"`
|
||||
AllowBYOK serpent.Bool `json:"allow_byok" typescript:",notnull"`
|
||||
// Circuit breaker protects against cascading failures from upstream AI
|
||||
// provider rate limits (429, 503, 529 overloaded).
|
||||
// provider overload (503, 529).
|
||||
CircuitBreakerEnabled serpent.Bool `json:"circuit_breaker_enabled" typescript:",notnull"`
|
||||
CircuitBreakerFailureThreshold serpent.Int64 `json:"circuit_breaker_failure_threshold" typescript:",notnull"`
|
||||
CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"`
|
||||
|
||||
Generated
+1
-1
@@ -487,7 +487,7 @@
|
||||
| `allow_byok` | boolean | false | | |
|
||||
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
||||
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
||||
| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). |
|
||||
| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider overload (503, 529). |
|
||||
| `circuit_breaker_failure_threshold` | integer | false | | |
|
||||
| `circuit_breaker_interval` | integer | false | | |
|
||||
| `circuit_breaker_max_requests` | integer | false | | |
|
||||
|
||||
Generated
+1
-1
@@ -1899,7 +1899,7 @@ Allow users to provide their own LLM API keys or subscriptions. When disabled, o
|
||||
| YAML | <code>aibridge.circuit_breaker_enabled</code> |
|
||||
| Default | <code>false</code> |
|
||||
|
||||
Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).
|
||||
Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).
|
||||
|
||||
### --aibridge-proxy-enabled
|
||||
|
||||
|
||||
@@ -438,13 +438,13 @@ func TestIntegrationCircuitBreaker(t *testing.T) {
|
||||
registry := prometheus.NewRegistry()
|
||||
metrics := aibridge.NewMetrics(registry)
|
||||
|
||||
// Set up mock OpenAI server that always returns 429 Too Many Requests.
|
||||
// Set up mock OpenAI server that always returns 503 Service Unavailable.
|
||||
mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
// Disable SDK retries.
|
||||
w.Header().Set("x-should-retry", "false")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(`{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`))
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte(`{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`))
|
||||
}))
|
||||
t.Cleanup(mockOpenAI.Close)
|
||||
|
||||
|
||||
+1
-1
@@ -139,7 +139,7 @@ AI BRIDGE OPTIONS:
|
||||
|
||||
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
||||
Enable the circuit breaker to protect against cascading failures from
|
||||
upstream AI provider rate limits (429, 503, 529 overloaded).
|
||||
upstream AI provider overload (503, 529).
|
||||
|
||||
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
||||
Length of time to retain data such as interceptions and all related
|
||||
|
||||
Generated
+1
-1
@@ -70,7 +70,7 @@ export interface AIBridgeConfig {
|
||||
readonly allow_byok: boolean;
|
||||
/**
|
||||
* Circuit breaker protects against cascading failures from upstream AI
|
||||
* provider rate limits (429, 503, 529 overloaded).
|
||||
* provider overload (503, 529).
|
||||
*/
|
||||
readonly circuit_breaker_enabled: boolean;
|
||||
readonly circuit_breaker_failure_threshold: number;
|
||||
|
||||
Reference in New Issue
Block a user