diff --git a/aibridge/circuitbreaker/circuitbreaker.go b/aibridge/circuitbreaker/circuitbreaker.go index 0f0880b192..61a2f05627 100644 --- a/aibridge/circuitbreaker/circuitbreaker.go +++ b/aibridge/circuitbreaker/circuitbreaker.go @@ -20,13 +20,15 @@ import ( // and the request was rejected without calling the handler. var ErrCircuitOpen = xerrors.New("circuit breaker is open") -// DefaultIsFailure returns true for standard HTTP status codes that typically -// indicate upstream overload. +// DefaultIsFailure returns true for standard HTTP status codes that +// typically indicate upstream overload. +// +// Note: 429 (Too Many Requests) is intentionally excluded. Rate +// limits are key-specific and handled by automatic key failover. func DefaultIsFailure(statusCode int) bool { switch statusCode { - case http.StatusTooManyRequests, // 429 - http.StatusServiceUnavailable, // 503 - http.StatusGatewayTimeout: // 504 + case http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout: // 504 return true default: return false diff --git a/aibridge/circuitbreaker/circuitbreaker_test.go b/aibridge/circuitbreaker/circuitbreaker_test.go index b80bfa2deb..57081e680a 100644 --- a/aibridge/circuitbreaker/circuitbreaker_test.go +++ b/aibridge/circuitbreaker/circuitbreaker_test.go @@ -32,11 +32,11 @@ func TestExecute_PerModelIsolation(t *testing.T) { sonnetModel := "claude-sonnet-4-20250514" haikuModel := "claude-3-5-haiku-20241022" - // Trip circuit on sonnet model (returns 429) + // Trip circuit on sonnet model (returns 503) w := httptest.NewRecorder() err := cbs.Execute(endpoint, sonnetModel, w, func(rw http.ResponseWriter) error { sonnetCalls.Add(1) - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -79,11 +79,11 @@ func TestExecute_PerEndpointIsolation(t *testing.T) { model := "test-model" - // Trip circuit on /v1/messages endpoint (returns 429) + // Trip circuit on /v1/messages endpoint (returns 503) w := httptest.NewRecorder() err := cbs.Execute("/v1/messages", model, w, func(rw http.ResponseWriter) error { messagesCalls.Add(1) - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -179,7 +179,7 @@ func TestExecute_OnStateChange(t *testing.T) { // Trip circuit w := httptest.NewRecorder() err := cbs.Execute(endpoint, model, w, func(rw http.ResponseWriter) error { - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -202,7 +202,7 @@ func TestDefaultIsFailure(t *testing.T) { {http.StatusOK, false}, {http.StatusBadRequest, false}, {http.StatusUnauthorized, false}, - {http.StatusTooManyRequests, true}, // 429 + {http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker {http.StatusInternalServerError, false}, {http.StatusBadGateway, false}, {http.StatusServiceUnavailable, true}, // 503 diff --git a/aibridge/internal/integrationtest/circuit_breaker_test.go b/aibridge/internal/integrationtest/circuit_breaker_test.go index 3ace843275..c2619ec920 100644 --- a/aibridge/internal/integrationtest/circuit_breaker_test.go +++ b/aibridge/internal/integrationtest/circuit_breaker_test.go @@ -23,8 +23,8 @@ import ( // Common response bodies for circuit breaker tests. const ( - anthropicRateLimitError = `{"type":"error","error":{"type":"rate_limit_error","message":"rate limited"}}` - openAIRateLimitError = `{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}` + anthropicOverloadedError = `{"type":"error","error":{"type":"api_error","message":"Internal server error"}}` + openAIOverloadedError = `{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}` ) func anthropicSuccessResponse(model string) string { @@ -59,7 +59,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"), requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ @@ -80,7 +80,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, successBody: openAISuccessResponse("gpt-4o"), requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, @@ -103,14 +103,14 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { var shouldFail atomic.Bool shouldFail.Store(true) - // Mock upstream that returns 429 or 200 based on shouldFail flag. + // Mock upstream that returns 503 or 200 based on shouldFail flag. // x-should-retry: false is required to disable SDK automatic retries (default MaxRetries=2). mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { upstreamCalls.Add(1) w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") if shouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) } else { w.WriteHeader(http.StatusOK) @@ -146,10 +146,10 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { } // Phase 1: Trip the circuit breaker - // First FailureThreshold requests hit upstream, get 429 + // First FailureThreshold requests hit upstream, get 503 for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } //nolint:gosec // G115: test constant, no overflow risk assert.Equal(t, int32(cbConfig.FailureThreshold), upstreamCalls.Load()) @@ -227,7 +227,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ "x-api-key": {"test"}, @@ -247,7 +247,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, path: pathOpenAIChatCompletions, @@ -267,12 +267,12 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { var upstreamCalls atomic.Int32 - // Mock upstream that always returns 429. + // Mock upstream that always returns 503. mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { upstreamCalls.Add(1) w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) })) defer mockUpstream.Close() @@ -305,7 +305,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { // Phase 1: Trip the circuit for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } // Verify circuit is open @@ -321,7 +321,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { // Phase 3: Request in half-open state fails, circuit should re-open upstreamCallsBefore := upstreamCalls.Load() status = doRequest() - assert.Equal(t, http.StatusTooManyRequests, status, "Request should fail in half-open state") + assert.Equal(t, http.StatusServiceUnavailable, status, "Request should fail in half-open state") assert.Equal(t, upstreamCallsBefore+1, upstreamCalls.Load(), "Request should reach upstream in half-open state") // Circuit should be open again - next request should be rejected immediately @@ -363,7 +363,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"), requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ @@ -384,7 +384,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, successBody: openAISuccessResponse("gpt-4o"), requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, @@ -413,7 +413,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") if shouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) } else { // Slow response to ensure requests overlap @@ -453,7 +453,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { // Phase 1: Trip the circuit for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } // Verify circuit is open @@ -529,8 +529,8 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) { if strings.Contains(string(body), "claude-sonnet-4-20250514") { sonnetCalls.Add(1) if sonnetShouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) - _, _ = w.Write([]byte(anthropicRateLimitError)) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(anthropicOverloadedError)) } else { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(anthropicSuccessResponse("claude-sonnet-4-20250514"))) @@ -578,7 +578,7 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) { // Phase 1: Trip the circuit for sonnet model for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest("claude-sonnet-4-20250514") - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } //nolint:gosec // G115: test constant, no overflow risk assert.Equal(t, int32(cbConfig.FailureThreshold), sonnetCalls.Load()) diff --git a/aibridge/provider/anthropic_test.go b/aibridge/provider/anthropic_test.go index 75eef0e0d0..a4ea0c21e2 100644 --- a/aibridge/provider/anthropic_test.go +++ b/aibridge/provider/anthropic_test.go @@ -360,7 +360,7 @@ func Test_anthropicIsFailure(t *testing.T) { {http.StatusOK, false}, {http.StatusBadRequest, false}, {http.StatusUnauthorized, false}, - {http.StatusTooManyRequests, true}, // 429 + {http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker {http.StatusInternalServerError, false}, {http.StatusBadGateway, false}, {http.StatusServiceUnavailable, true}, // 503 diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 286dfc0b1b..2862a45c3b 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -138,7 +138,7 @@ AI BRIDGE OPTIONS: --aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false) Enable the circuit breaker to protect against cascading failures from - upstream AI provider rate limits (429, 503, 529 overloaded). + upstream AI provider overload (503, 529). --aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d) Length of time to retain data such as interceptions and all related diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index abc5faa877..22ad14e506 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -821,7 +821,7 @@ aibridge: # (default: true, type: bool) allow_byok: true # Enable the circuit breaker to protect against cascading failures from upstream - # AI provider rate limits (429, 503, 529 overloaded). + # AI provider overload (503, 529). # (default: false, type: bool) circuit_breaker_enabled: false # Number of consecutive failures that triggers the circuit breaker to open. diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index bec25890ff..0c9ef6f930 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -13138,7 +13138,7 @@ const docTemplate = `{ ] }, "circuit_breaker_enabled": { - "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).", + "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).", "type": "boolean" }, "circuit_breaker_failure_threshold": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 356d14b6df..956fe5c8ef 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -11686,7 +11686,7 @@ ] }, "circuit_breaker_enabled": { - "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).", + "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).", "type": "boolean" }, "circuit_breaker_failure_threshold": { diff --git a/codersdk/deployment.go b/codersdk/deployment.go index be2f738ceb..c7b17b4235 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -3823,7 +3823,7 @@ Write out the current server config as YAML to stdout.`, }, { Name: "AI Bridge Circuit Breaker Enabled", - Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).", + Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).", Flag: "aibridge-circuit-breaker-enabled", Env: "CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED", Value: &c.AI.BridgeConfig.CircuitBreakerEnabled, @@ -4074,7 +4074,7 @@ type AIBridgeConfig struct { SendActorHeaders serpent.Bool `json:"send_actor_headers" typescript:",notnull"` AllowBYOK serpent.Bool `json:"allow_byok" typescript:",notnull"` // Circuit breaker protects against cascading failures from upstream AI - // provider rate limits (429, 503, 529 overloaded). + // provider overload (503, 529). CircuitBreakerEnabled serpent.Bool `json:"circuit_breaker_enabled" typescript:",notnull"` CircuitBreakerFailureThreshold serpent.Int64 `json:"circuit_breaker_failure_threshold" typescript:",notnull"` CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"` diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index 33a5b48440..213a6018cc 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -487,7 +487,7 @@ | `allow_byok` | boolean | false | | | | `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER__* env vars instead. | | `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER__* env vars instead. | -| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). | +| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider overload (503, 529). | | `circuit_breaker_failure_threshold` | integer | false | | | | `circuit_breaker_interval` | integer | false | | | | `circuit_breaker_max_requests` | integer | false | | | diff --git a/docs/reference/cli/server.md b/docs/reference/cli/server.md index 03c2f2c753..2aa7ce2242 100644 --- a/docs/reference/cli/server.md +++ b/docs/reference/cli/server.md @@ -1899,7 +1899,7 @@ Allow users to provide their own LLM API keys or subscriptions. When disabled, o | YAML | aibridge.circuit_breaker_enabled | | Default | false | -Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). +Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529). ### --aibridge-proxy-enabled diff --git a/enterprise/aibridged/aibridged_integration_test.go b/enterprise/aibridged/aibridged_integration_test.go index 23579c4530..ca401cda3a 100644 --- a/enterprise/aibridged/aibridged_integration_test.go +++ b/enterprise/aibridged/aibridged_integration_test.go @@ -438,13 +438,13 @@ func TestIntegrationCircuitBreaker(t *testing.T) { registry := prometheus.NewRegistry() metrics := aibridge.NewMetrics(registry) - // Set up mock OpenAI server that always returns 429 Too Many Requests. + // Set up mock OpenAI server that always returns 503 Service Unavailable. mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") // Disable SDK retries. w.Header().Set("x-should-retry", "false") - w.WriteHeader(http.StatusTooManyRequests) - _, _ = w.Write([]byte(`{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`)) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`)) })) t.Cleanup(mockOpenAI.Close) diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 05a3790b16..a0cc791d54 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -139,7 +139,7 @@ AI BRIDGE OPTIONS: --aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false) Enable the circuit breaker to protect against cascading failures from - upstream AI provider rate limits (429, 503, 529 overloaded). + upstream AI provider overload (503, 529). --aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d) Length of time to retain data such as interceptions and all related diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 8ebdad635b..75d83e5a9b 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -70,7 +70,7 @@ export interface AIBridgeConfig { readonly allow_byok: boolean; /** * Circuit breaker protects against cascading failures from upstream AI - * provider rate limits (429, 503, 529 overloaded). + * provider overload (503, 529). */ readonly circuit_breaker_enabled: boolean; readonly circuit_breaker_failure_threshold: number;