From dbb50ebaaf6eabafb6e75a825e168624f9e72fbd Mon Sep 17 00:00:00 2001 From: Susana Ferreira Date: Thu, 30 Apr 2026 09:31:32 +0100 Subject: [PATCH] feat: remove 429 from aibridge circuit breaker failure conditions (#24701) ## Description Removes 429 (Too Many Requests) from the circuit breaker failure conditions. Rate limiting is now handled by automatic key failover instead of tripping the circuit breaker. ## Changes `DefaultIsFailure` no longer treats 429 as a circuit breaker failure. The circuit breaker now only trips on server overload responses (503, 529). Tests and integration tests updated to use 503 instead of 429 for tripping circuits. Description strings in deployment config updated to reflect the change. Closes https://github.com/coder/internal/issues/1445 > [!NOTE] > Initially generated by Coder Agents, modified and reviewed by @ssncferreira --- aibridge/circuitbreaker/circuitbreaker.go | 12 +++--- .../circuitbreaker/circuitbreaker_test.go | 12 +++--- .../integrationtest/circuit_breaker_test.go | 42 +++++++++---------- aibridge/provider/anthropic_test.go | 2 +- cli/testdata/coder_server_--help.golden | 2 +- cli/testdata/server-config.yaml.golden | 2 +- coderd/apidoc/docs.go | 2 +- coderd/apidoc/swagger.json | 2 +- codersdk/deployment.go | 4 +- docs/reference/api/schemas.md | 2 +- docs/reference/cli/server.md | 2 +- .../aibridged/aibridged_integration_test.go | 6 +-- .../cli/testdata/coder_server_--help.golden | 2 +- site/src/api/typesGenerated.ts | 2 +- 14 files changed, 48 insertions(+), 46 deletions(-) diff --git a/aibridge/circuitbreaker/circuitbreaker.go b/aibridge/circuitbreaker/circuitbreaker.go index 0f0880b192..61a2f05627 100644 --- a/aibridge/circuitbreaker/circuitbreaker.go +++ b/aibridge/circuitbreaker/circuitbreaker.go @@ -20,13 +20,15 @@ import ( // and the request was rejected without calling the handler. var ErrCircuitOpen = xerrors.New("circuit breaker is open") -// DefaultIsFailure returns true for standard HTTP status codes that typically -// indicate upstream overload. +// DefaultIsFailure returns true for standard HTTP status codes that +// typically indicate upstream overload. +// +// Note: 429 (Too Many Requests) is intentionally excluded. Rate +// limits are key-specific and handled by automatic key failover. func DefaultIsFailure(statusCode int) bool { switch statusCode { - case http.StatusTooManyRequests, // 429 - http.StatusServiceUnavailable, // 503 - http.StatusGatewayTimeout: // 504 + case http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout: // 504 return true default: return false diff --git a/aibridge/circuitbreaker/circuitbreaker_test.go b/aibridge/circuitbreaker/circuitbreaker_test.go index b80bfa2deb..57081e680a 100644 --- a/aibridge/circuitbreaker/circuitbreaker_test.go +++ b/aibridge/circuitbreaker/circuitbreaker_test.go @@ -32,11 +32,11 @@ func TestExecute_PerModelIsolation(t *testing.T) { sonnetModel := "claude-sonnet-4-20250514" haikuModel := "claude-3-5-haiku-20241022" - // Trip circuit on sonnet model (returns 429) + // Trip circuit on sonnet model (returns 503) w := httptest.NewRecorder() err := cbs.Execute(endpoint, sonnetModel, w, func(rw http.ResponseWriter) error { sonnetCalls.Add(1) - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -79,11 +79,11 @@ func TestExecute_PerEndpointIsolation(t *testing.T) { model := "test-model" - // Trip circuit on /v1/messages endpoint (returns 429) + // Trip circuit on /v1/messages endpoint (returns 503) w := httptest.NewRecorder() err := cbs.Execute("/v1/messages", model, w, func(rw http.ResponseWriter) error { messagesCalls.Add(1) - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -179,7 +179,7 @@ func TestExecute_OnStateChange(t *testing.T) { // Trip circuit w := httptest.NewRecorder() err := cbs.Execute(endpoint, model, w, func(rw http.ResponseWriter) error { - rw.WriteHeader(http.StatusTooManyRequests) + rw.WriteHeader(http.StatusServiceUnavailable) return nil }) assert.NoError(t, err) @@ -202,7 +202,7 @@ func TestDefaultIsFailure(t *testing.T) { {http.StatusOK, false}, {http.StatusBadRequest, false}, {http.StatusUnauthorized, false}, - {http.StatusTooManyRequests, true}, // 429 + {http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker {http.StatusInternalServerError, false}, {http.StatusBadGateway, false}, {http.StatusServiceUnavailable, true}, // 503 diff --git a/aibridge/internal/integrationtest/circuit_breaker_test.go b/aibridge/internal/integrationtest/circuit_breaker_test.go index 3ace843275..c2619ec920 100644 --- a/aibridge/internal/integrationtest/circuit_breaker_test.go +++ b/aibridge/internal/integrationtest/circuit_breaker_test.go @@ -23,8 +23,8 @@ import ( // Common response bodies for circuit breaker tests. const ( - anthropicRateLimitError = `{"type":"error","error":{"type":"rate_limit_error","message":"rate limited"}}` - openAIRateLimitError = `{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}` + anthropicOverloadedError = `{"type":"error","error":{"type":"api_error","message":"Internal server error"}}` + openAIOverloadedError = `{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}` ) func anthropicSuccessResponse(model string) string { @@ -59,7 +59,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"), requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ @@ -80,7 +80,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, successBody: openAISuccessResponse("gpt-4o"), requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, @@ -103,14 +103,14 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { var shouldFail atomic.Bool shouldFail.Store(true) - // Mock upstream that returns 429 or 200 based on shouldFail flag. + // Mock upstream that returns 503 or 200 based on shouldFail flag. // x-should-retry: false is required to disable SDK automatic retries (default MaxRetries=2). mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { upstreamCalls.Add(1) w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") if shouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) } else { w.WriteHeader(http.StatusOK) @@ -146,10 +146,10 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) { } // Phase 1: Trip the circuit breaker - // First FailureThreshold requests hit upstream, get 429 + // First FailureThreshold requests hit upstream, get 503 for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } //nolint:gosec // G115: test constant, no overflow risk assert.Equal(t, int32(cbConfig.FailureThreshold), upstreamCalls.Load()) @@ -227,7 +227,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ "x-api-key": {"test"}, @@ -247,7 +247,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, path: pathOpenAIChatCompletions, @@ -267,12 +267,12 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { var upstreamCalls atomic.Int32 - // Mock upstream that always returns 429. + // Mock upstream that always returns 503. mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { upstreamCalls.Add(1) w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) })) defer mockUpstream.Close() @@ -305,7 +305,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { // Phase 1: Trip the circuit for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } // Verify circuit is open @@ -321,7 +321,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) { // Phase 3: Request in half-open state fails, circuit should re-open upstreamCallsBefore := upstreamCalls.Load() status = doRequest() - assert.Equal(t, http.StatusTooManyRequests, status, "Request should fail in half-open state") + assert.Equal(t, http.StatusServiceUnavailable, status, "Request should fail in half-open state") assert.Equal(t, upstreamCallsBefore+1, upstreamCalls.Load(), "Request should reach upstream in half-open state") // Circuit should be open again - next request should be rejected immediately @@ -363,7 +363,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { expectProvider: config.ProviderAnthropic, expectEndpoint: "/v1/messages", expectModel: "claude-sonnet-4-20250514", - errorBody: anthropicRateLimitError, + errorBody: anthropicOverloadedError, successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"), requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{ @@ -384,7 +384,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { expectProvider: config.ProviderOpenAI, expectEndpoint: "/v1/chat/completions", expectModel: "gpt-4o", - errorBody: openAIRateLimitError, + errorBody: openAIOverloadedError, successBody: openAISuccessResponse("gpt-4o"), requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`, headers: http.Header{"Authorization": {"Bearer test-key"}}, @@ -413,7 +413,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { w.Header().Set("Content-Type", "application/json") w.Header().Set("x-should-retry", "false") if shouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) + w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(tc.errorBody)) } else { // Slow response to ensure requests overlap @@ -453,7 +453,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) { // Phase 1: Trip the circuit for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest() - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } // Verify circuit is open @@ -529,8 +529,8 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) { if strings.Contains(string(body), "claude-sonnet-4-20250514") { sonnetCalls.Add(1) if sonnetShouldFail.Load() { - w.WriteHeader(http.StatusTooManyRequests) - _, _ = w.Write([]byte(anthropicRateLimitError)) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(anthropicOverloadedError)) } else { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(anthropicSuccessResponse("claude-sonnet-4-20250514"))) @@ -578,7 +578,7 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) { // Phase 1: Trip the circuit for sonnet model for i := uint32(0); i < cbConfig.FailureThreshold; i++ { status := doRequest("claude-sonnet-4-20250514") - assert.Equal(t, http.StatusTooManyRequests, status) + assert.Equal(t, http.StatusServiceUnavailable, status) } //nolint:gosec // G115: test constant, no overflow risk assert.Equal(t, int32(cbConfig.FailureThreshold), sonnetCalls.Load()) diff --git a/aibridge/provider/anthropic_test.go b/aibridge/provider/anthropic_test.go index 75eef0e0d0..a4ea0c21e2 100644 --- a/aibridge/provider/anthropic_test.go +++ b/aibridge/provider/anthropic_test.go @@ -360,7 +360,7 @@ func Test_anthropicIsFailure(t *testing.T) { {http.StatusOK, false}, {http.StatusBadRequest, false}, {http.StatusUnauthorized, false}, - {http.StatusTooManyRequests, true}, // 429 + {http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker {http.StatusInternalServerError, false}, {http.StatusBadGateway, false}, {http.StatusServiceUnavailable, true}, // 503 diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 286dfc0b1b..2862a45c3b 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -138,7 +138,7 @@ AI BRIDGE OPTIONS: --aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false) Enable the circuit breaker to protect against cascading failures from - upstream AI provider rate limits (429, 503, 529 overloaded). + upstream AI provider overload (503, 529). --aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d) Length of time to retain data such as interceptions and all related diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index abc5faa877..22ad14e506 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -821,7 +821,7 @@ aibridge: # (default: true, type: bool) allow_byok: true # Enable the circuit breaker to protect against cascading failures from upstream - # AI provider rate limits (429, 503, 529 overloaded). + # AI provider overload (503, 529). # (default: false, type: bool) circuit_breaker_enabled: false # Number of consecutive failures that triggers the circuit breaker to open. diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index bec25890ff..0c9ef6f930 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -13138,7 +13138,7 @@ const docTemplate = `{ ] }, "circuit_breaker_enabled": { - "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).", + "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).", "type": "boolean" }, "circuit_breaker_failure_threshold": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 356d14b6df..956fe5c8ef 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -11686,7 +11686,7 @@ ] }, "circuit_breaker_enabled": { - "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).", + "description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).", "type": "boolean" }, "circuit_breaker_failure_threshold": { diff --git a/codersdk/deployment.go b/codersdk/deployment.go index be2f738ceb..c7b17b4235 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -3823,7 +3823,7 @@ Write out the current server config as YAML to stdout.`, }, { Name: "AI Bridge Circuit Breaker Enabled", - Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).", + Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).", Flag: "aibridge-circuit-breaker-enabled", Env: "CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED", Value: &c.AI.BridgeConfig.CircuitBreakerEnabled, @@ -4074,7 +4074,7 @@ type AIBridgeConfig struct { SendActorHeaders serpent.Bool `json:"send_actor_headers" typescript:",notnull"` AllowBYOK serpent.Bool `json:"allow_byok" typescript:",notnull"` // Circuit breaker protects against cascading failures from upstream AI - // provider rate limits (429, 503, 529 overloaded). + // provider overload (503, 529). CircuitBreakerEnabled serpent.Bool `json:"circuit_breaker_enabled" typescript:",notnull"` CircuitBreakerFailureThreshold serpent.Int64 `json:"circuit_breaker_failure_threshold" typescript:",notnull"` CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"` diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index 33a5b48440..213a6018cc 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -487,7 +487,7 @@ | `allow_byok` | boolean | false | | | | `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER__* env vars instead. | | `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER__* env vars instead. | -| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). | +| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider overload (503, 529). | | `circuit_breaker_failure_threshold` | integer | false | | | | `circuit_breaker_interval` | integer | false | | | | `circuit_breaker_max_requests` | integer | false | | | diff --git a/docs/reference/cli/server.md b/docs/reference/cli/server.md index 03c2f2c753..2aa7ce2242 100644 --- a/docs/reference/cli/server.md +++ b/docs/reference/cli/server.md @@ -1899,7 +1899,7 @@ Allow users to provide their own LLM API keys or subscriptions. When disabled, o | YAML | aibridge.circuit_breaker_enabled | | Default | false | -Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). +Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529). ### --aibridge-proxy-enabled diff --git a/enterprise/aibridged/aibridged_integration_test.go b/enterprise/aibridged/aibridged_integration_test.go index 23579c4530..ca401cda3a 100644 --- a/enterprise/aibridged/aibridged_integration_test.go +++ b/enterprise/aibridged/aibridged_integration_test.go @@ -438,13 +438,13 @@ func TestIntegrationCircuitBreaker(t *testing.T) { registry := prometheus.NewRegistry() metrics := aibridge.NewMetrics(registry) - // Set up mock OpenAI server that always returns 429 Too Many Requests. + // Set up mock OpenAI server that always returns 503 Service Unavailable. mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") // Disable SDK retries. w.Header().Set("x-should-retry", "false") - w.WriteHeader(http.StatusTooManyRequests) - _, _ = w.Write([]byte(`{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`)) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`)) })) t.Cleanup(mockOpenAI.Close) diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 05a3790b16..a0cc791d54 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -139,7 +139,7 @@ AI BRIDGE OPTIONS: --aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false) Enable the circuit breaker to protect against cascading failures from - upstream AI provider rate limits (429, 503, 529 overloaded). + upstream AI provider overload (503, 529). --aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d) Length of time to retain data such as interceptions and all related diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 8ebdad635b..75d83e5a9b 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -70,7 +70,7 @@ export interface AIBridgeConfig { readonly allow_byok: boolean; /** * Circuit breaker protects against cascading failures from upstream AI - * provider rate limits (429, 503, 529 overloaded). + * provider overload (503, 529). */ readonly circuit_breaker_enabled: boolean; readonly circuit_breaker_failure_threshold: number;