mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: remove 429 from aibridge circuit breaker failure conditions (#24701)
## Description Removes 429 (Too Many Requests) from the circuit breaker failure conditions. Rate limiting is now handled by automatic key failover instead of tripping the circuit breaker. ## Changes `DefaultIsFailure` no longer treats 429 as a circuit breaker failure. The circuit breaker now only trips on server overload responses (503, 529). Tests and integration tests updated to use 503 instead of 429 for tripping circuits. Description strings in deployment config updated to reflect the change. Closes https://github.com/coder/internal/issues/1445 > [!NOTE] > Initially generated by Coder Agents, modified and reviewed by @ssncferreira
This commit is contained in:
@@ -20,13 +20,15 @@ import (
|
|||||||
// and the request was rejected without calling the handler.
|
// and the request was rejected without calling the handler.
|
||||||
var ErrCircuitOpen = xerrors.New("circuit breaker is open")
|
var ErrCircuitOpen = xerrors.New("circuit breaker is open")
|
||||||
|
|
||||||
// DefaultIsFailure returns true for standard HTTP status codes that typically
|
// DefaultIsFailure returns true for standard HTTP status codes that
|
||||||
// indicate upstream overload.
|
// typically indicate upstream overload.
|
||||||
|
//
|
||||||
|
// Note: 429 (Too Many Requests) is intentionally excluded. Rate
|
||||||
|
// limits are key-specific and handled by automatic key failover.
|
||||||
func DefaultIsFailure(statusCode int) bool {
|
func DefaultIsFailure(statusCode int) bool {
|
||||||
switch statusCode {
|
switch statusCode {
|
||||||
case http.StatusTooManyRequests, // 429
|
case http.StatusServiceUnavailable, // 503
|
||||||
http.StatusServiceUnavailable, // 503
|
http.StatusGatewayTimeout: // 504
|
||||||
http.StatusGatewayTimeout: // 504
|
|
||||||
return true
|
return true
|
||||||
default:
|
default:
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -32,11 +32,11 @@ func TestExecute_PerModelIsolation(t *testing.T) {
|
|||||||
sonnetModel := "claude-sonnet-4-20250514"
|
sonnetModel := "claude-sonnet-4-20250514"
|
||||||
haikuModel := "claude-3-5-haiku-20241022"
|
haikuModel := "claude-3-5-haiku-20241022"
|
||||||
|
|
||||||
// Trip circuit on sonnet model (returns 429)
|
// Trip circuit on sonnet model (returns 503)
|
||||||
w := httptest.NewRecorder()
|
w := httptest.NewRecorder()
|
||||||
err := cbs.Execute(endpoint, sonnetModel, w, func(rw http.ResponseWriter) error {
|
err := cbs.Execute(endpoint, sonnetModel, w, func(rw http.ResponseWriter) error {
|
||||||
sonnetCalls.Add(1)
|
sonnetCalls.Add(1)
|
||||||
rw.WriteHeader(http.StatusTooManyRequests)
|
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@@ -79,11 +79,11 @@ func TestExecute_PerEndpointIsolation(t *testing.T) {
|
|||||||
|
|
||||||
model := "test-model"
|
model := "test-model"
|
||||||
|
|
||||||
// Trip circuit on /v1/messages endpoint (returns 429)
|
// Trip circuit on /v1/messages endpoint (returns 503)
|
||||||
w := httptest.NewRecorder()
|
w := httptest.NewRecorder()
|
||||||
err := cbs.Execute("/v1/messages", model, w, func(rw http.ResponseWriter) error {
|
err := cbs.Execute("/v1/messages", model, w, func(rw http.ResponseWriter) error {
|
||||||
messagesCalls.Add(1)
|
messagesCalls.Add(1)
|
||||||
rw.WriteHeader(http.StatusTooManyRequests)
|
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@@ -179,7 +179,7 @@ func TestExecute_OnStateChange(t *testing.T) {
|
|||||||
// Trip circuit
|
// Trip circuit
|
||||||
w := httptest.NewRecorder()
|
w := httptest.NewRecorder()
|
||||||
err := cbs.Execute(endpoint, model, w, func(rw http.ResponseWriter) error {
|
err := cbs.Execute(endpoint, model, w, func(rw http.ResponseWriter) error {
|
||||||
rw.WriteHeader(http.StatusTooManyRequests)
|
rw.WriteHeader(http.StatusServiceUnavailable)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@@ -202,7 +202,7 @@ func TestDefaultIsFailure(t *testing.T) {
|
|||||||
{http.StatusOK, false},
|
{http.StatusOK, false},
|
||||||
{http.StatusBadRequest, false},
|
{http.StatusBadRequest, false},
|
||||||
{http.StatusUnauthorized, false},
|
{http.StatusUnauthorized, false},
|
||||||
{http.StatusTooManyRequests, true}, // 429
|
{http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker
|
||||||
{http.StatusInternalServerError, false},
|
{http.StatusInternalServerError, false},
|
||||||
{http.StatusBadGateway, false},
|
{http.StatusBadGateway, false},
|
||||||
{http.StatusServiceUnavailable, true}, // 503
|
{http.StatusServiceUnavailable, true}, // 503
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ import (
|
|||||||
|
|
||||||
// Common response bodies for circuit breaker tests.
|
// Common response bodies for circuit breaker tests.
|
||||||
const (
|
const (
|
||||||
anthropicRateLimitError = `{"type":"error","error":{"type":"rate_limit_error","message":"rate limited"}}`
|
anthropicOverloadedError = `{"type":"error","error":{"type":"api_error","message":"Internal server error"}}`
|
||||||
openAIRateLimitError = `{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`
|
openAIOverloadedError = `{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`
|
||||||
)
|
)
|
||||||
|
|
||||||
func anthropicSuccessResponse(model string) string {
|
func anthropicSuccessResponse(model string) string {
|
||||||
@@ -59,7 +59,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
|||||||
expectProvider: config.ProviderAnthropic,
|
expectProvider: config.ProviderAnthropic,
|
||||||
expectEndpoint: "/v1/messages",
|
expectEndpoint: "/v1/messages",
|
||||||
expectModel: "claude-sonnet-4-20250514",
|
expectModel: "claude-sonnet-4-20250514",
|
||||||
errorBody: anthropicRateLimitError,
|
errorBody: anthropicOverloadedError,
|
||||||
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
||||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{
|
headers: http.Header{
|
||||||
@@ -80,7 +80,7 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
|||||||
expectProvider: config.ProviderOpenAI,
|
expectProvider: config.ProviderOpenAI,
|
||||||
expectEndpoint: "/v1/chat/completions",
|
expectEndpoint: "/v1/chat/completions",
|
||||||
expectModel: "gpt-4o",
|
expectModel: "gpt-4o",
|
||||||
errorBody: openAIRateLimitError,
|
errorBody: openAIOverloadedError,
|
||||||
successBody: openAISuccessResponse("gpt-4o"),
|
successBody: openAISuccessResponse("gpt-4o"),
|
||||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||||
@@ -103,14 +103,14 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
|||||||
var shouldFail atomic.Bool
|
var shouldFail atomic.Bool
|
||||||
shouldFail.Store(true)
|
shouldFail.Store(true)
|
||||||
|
|
||||||
// Mock upstream that returns 429 or 200 based on shouldFail flag.
|
// Mock upstream that returns 503 or 200 based on shouldFail flag.
|
||||||
// x-should-retry: false is required to disable SDK automatic retries (default MaxRetries=2).
|
// x-should-retry: false is required to disable SDK automatic retries (default MaxRetries=2).
|
||||||
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
upstreamCalls.Add(1)
|
upstreamCalls.Add(1)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.Header().Set("x-should-retry", "false")
|
w.Header().Set("x-should-retry", "false")
|
||||||
if shouldFail.Load() {
|
if shouldFail.Load() {
|
||||||
w.WriteHeader(http.StatusTooManyRequests)
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
_, _ = w.Write([]byte(tc.errorBody))
|
_, _ = w.Write([]byte(tc.errorBody))
|
||||||
} else {
|
} else {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
@@ -146,10 +146,10 @@ func TestCircuitBreaker_FullRecoveryCycle(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Phase 1: Trip the circuit breaker
|
// Phase 1: Trip the circuit breaker
|
||||||
// First FailureThreshold requests hit upstream, get 429
|
// First FailureThreshold requests hit upstream, get 503
|
||||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||||
status := doRequest()
|
status := doRequest()
|
||||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||||
}
|
}
|
||||||
//nolint:gosec // G115: test constant, no overflow risk
|
//nolint:gosec // G115: test constant, no overflow risk
|
||||||
assert.Equal(t, int32(cbConfig.FailureThreshold), upstreamCalls.Load())
|
assert.Equal(t, int32(cbConfig.FailureThreshold), upstreamCalls.Load())
|
||||||
@@ -227,7 +227,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
|||||||
expectProvider: config.ProviderAnthropic,
|
expectProvider: config.ProviderAnthropic,
|
||||||
expectEndpoint: "/v1/messages",
|
expectEndpoint: "/v1/messages",
|
||||||
expectModel: "claude-sonnet-4-20250514",
|
expectModel: "claude-sonnet-4-20250514",
|
||||||
errorBody: anthropicRateLimitError,
|
errorBody: anthropicOverloadedError,
|
||||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{
|
headers: http.Header{
|
||||||
"x-api-key": {"test"},
|
"x-api-key": {"test"},
|
||||||
@@ -247,7 +247,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
|||||||
expectProvider: config.ProviderOpenAI,
|
expectProvider: config.ProviderOpenAI,
|
||||||
expectEndpoint: "/v1/chat/completions",
|
expectEndpoint: "/v1/chat/completions",
|
||||||
expectModel: "gpt-4o",
|
expectModel: "gpt-4o",
|
||||||
errorBody: openAIRateLimitError,
|
errorBody: openAIOverloadedError,
|
||||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||||
path: pathOpenAIChatCompletions,
|
path: pathOpenAIChatCompletions,
|
||||||
@@ -267,12 +267,12 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
|||||||
|
|
||||||
var upstreamCalls atomic.Int32
|
var upstreamCalls atomic.Int32
|
||||||
|
|
||||||
// Mock upstream that always returns 429.
|
// Mock upstream that always returns 503.
|
||||||
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
mockUpstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
upstreamCalls.Add(1)
|
upstreamCalls.Add(1)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.Header().Set("x-should-retry", "false")
|
w.Header().Set("x-should-retry", "false")
|
||||||
w.WriteHeader(http.StatusTooManyRequests)
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
_, _ = w.Write([]byte(tc.errorBody))
|
_, _ = w.Write([]byte(tc.errorBody))
|
||||||
}))
|
}))
|
||||||
defer mockUpstream.Close()
|
defer mockUpstream.Close()
|
||||||
@@ -305,7 +305,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
|||||||
// Phase 1: Trip the circuit
|
// Phase 1: Trip the circuit
|
||||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||||
status := doRequest()
|
status := doRequest()
|
||||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify circuit is open
|
// Verify circuit is open
|
||||||
@@ -321,7 +321,7 @@ func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
|
|||||||
// Phase 3: Request in half-open state fails, circuit should re-open
|
// Phase 3: Request in half-open state fails, circuit should re-open
|
||||||
upstreamCallsBefore := upstreamCalls.Load()
|
upstreamCallsBefore := upstreamCalls.Load()
|
||||||
status = doRequest()
|
status = doRequest()
|
||||||
assert.Equal(t, http.StatusTooManyRequests, status, "Request should fail in half-open state")
|
assert.Equal(t, http.StatusServiceUnavailable, status, "Request should fail in half-open state")
|
||||||
assert.Equal(t, upstreamCallsBefore+1, upstreamCalls.Load(), "Request should reach upstream in half-open state")
|
assert.Equal(t, upstreamCallsBefore+1, upstreamCalls.Load(), "Request should reach upstream in half-open state")
|
||||||
|
|
||||||
// Circuit should be open again - next request should be rejected immediately
|
// Circuit should be open again - next request should be rejected immediately
|
||||||
@@ -363,7 +363,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
|||||||
expectProvider: config.ProviderAnthropic,
|
expectProvider: config.ProviderAnthropic,
|
||||||
expectEndpoint: "/v1/messages",
|
expectEndpoint: "/v1/messages",
|
||||||
expectModel: "claude-sonnet-4-20250514",
|
expectModel: "claude-sonnet-4-20250514",
|
||||||
errorBody: anthropicRateLimitError,
|
errorBody: anthropicOverloadedError,
|
||||||
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
successBody: anthropicSuccessResponse("claude-sonnet-4-20250514"),
|
||||||
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{
|
headers: http.Header{
|
||||||
@@ -384,7 +384,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
|||||||
expectProvider: config.ProviderOpenAI,
|
expectProvider: config.ProviderOpenAI,
|
||||||
expectEndpoint: "/v1/chat/completions",
|
expectEndpoint: "/v1/chat/completions",
|
||||||
expectModel: "gpt-4o",
|
expectModel: "gpt-4o",
|
||||||
errorBody: openAIRateLimitError,
|
errorBody: openAIOverloadedError,
|
||||||
successBody: openAISuccessResponse("gpt-4o"),
|
successBody: openAISuccessResponse("gpt-4o"),
|
||||||
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
requestBody: `{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}`,
|
||||||
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
headers: http.Header{"Authorization": {"Bearer test-key"}},
|
||||||
@@ -413,7 +413,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
|||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.Header().Set("x-should-retry", "false")
|
w.Header().Set("x-should-retry", "false")
|
||||||
if shouldFail.Load() {
|
if shouldFail.Load() {
|
||||||
w.WriteHeader(http.StatusTooManyRequests)
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
_, _ = w.Write([]byte(tc.errorBody))
|
_, _ = w.Write([]byte(tc.errorBody))
|
||||||
} else {
|
} else {
|
||||||
// Slow response to ensure requests overlap
|
// Slow response to ensure requests overlap
|
||||||
@@ -453,7 +453,7 @@ func TestCircuitBreaker_HalfOpenMaxRequests(t *testing.T) {
|
|||||||
// Phase 1: Trip the circuit
|
// Phase 1: Trip the circuit
|
||||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||||
status := doRequest()
|
status := doRequest()
|
||||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify circuit is open
|
// Verify circuit is open
|
||||||
@@ -529,8 +529,8 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) {
|
|||||||
if strings.Contains(string(body), "claude-sonnet-4-20250514") {
|
if strings.Contains(string(body), "claude-sonnet-4-20250514") {
|
||||||
sonnetCalls.Add(1)
|
sonnetCalls.Add(1)
|
||||||
if sonnetShouldFail.Load() {
|
if sonnetShouldFail.Load() {
|
||||||
w.WriteHeader(http.StatusTooManyRequests)
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
_, _ = w.Write([]byte(anthropicRateLimitError))
|
_, _ = w.Write([]byte(anthropicOverloadedError))
|
||||||
} else {
|
} else {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
_, _ = w.Write([]byte(anthropicSuccessResponse("claude-sonnet-4-20250514")))
|
_, _ = w.Write([]byte(anthropicSuccessResponse("claude-sonnet-4-20250514")))
|
||||||
@@ -578,7 +578,7 @@ func TestCircuitBreaker_PerModelIsolation(t *testing.T) {
|
|||||||
// Phase 1: Trip the circuit for sonnet model
|
// Phase 1: Trip the circuit for sonnet model
|
||||||
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
for i := uint32(0); i < cbConfig.FailureThreshold; i++ {
|
||||||
status := doRequest("claude-sonnet-4-20250514")
|
status := doRequest("claude-sonnet-4-20250514")
|
||||||
assert.Equal(t, http.StatusTooManyRequests, status)
|
assert.Equal(t, http.StatusServiceUnavailable, status)
|
||||||
}
|
}
|
||||||
//nolint:gosec // G115: test constant, no overflow risk
|
//nolint:gosec // G115: test constant, no overflow risk
|
||||||
assert.Equal(t, int32(cbConfig.FailureThreshold), sonnetCalls.Load())
|
assert.Equal(t, int32(cbConfig.FailureThreshold), sonnetCalls.Load())
|
||||||
|
|||||||
@@ -360,7 +360,7 @@ func Test_anthropicIsFailure(t *testing.T) {
|
|||||||
{http.StatusOK, false},
|
{http.StatusOK, false},
|
||||||
{http.StatusBadRequest, false},
|
{http.StatusBadRequest, false},
|
||||||
{http.StatusUnauthorized, false},
|
{http.StatusUnauthorized, false},
|
||||||
{http.StatusTooManyRequests, true}, // 429
|
{http.StatusTooManyRequests, false}, // 429: handled by key failover, not circuit breaker
|
||||||
{http.StatusInternalServerError, false},
|
{http.StatusInternalServerError, false},
|
||||||
{http.StatusBadGateway, false},
|
{http.StatusBadGateway, false},
|
||||||
{http.StatusServiceUnavailable, true}, // 503
|
{http.StatusServiceUnavailable, true}, // 503
|
||||||
|
|||||||
+1
-1
@@ -138,7 +138,7 @@ AI BRIDGE OPTIONS:
|
|||||||
|
|
||||||
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
||||||
Enable the circuit breaker to protect against cascading failures from
|
Enable the circuit breaker to protect against cascading failures from
|
||||||
upstream AI provider rate limits (429, 503, 529 overloaded).
|
upstream AI provider overload (503, 529).
|
||||||
|
|
||||||
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
||||||
Length of time to retain data such as interceptions and all related
|
Length of time to retain data such as interceptions and all related
|
||||||
|
|||||||
+1
-1
@@ -821,7 +821,7 @@ aibridge:
|
|||||||
# (default: true, type: bool)
|
# (default: true, type: bool)
|
||||||
allow_byok: true
|
allow_byok: true
|
||||||
# Enable the circuit breaker to protect against cascading failures from upstream
|
# Enable the circuit breaker to protect against cascading failures from upstream
|
||||||
# AI provider rate limits (429, 503, 529 overloaded).
|
# AI provider overload (503, 529).
|
||||||
# (default: false, type: bool)
|
# (default: false, type: bool)
|
||||||
circuit_breaker_enabled: false
|
circuit_breaker_enabled: false
|
||||||
# Number of consecutive failures that triggers the circuit breaker to open.
|
# Number of consecutive failures that triggers the circuit breaker to open.
|
||||||
|
|||||||
Generated
+1
-1
@@ -13138,7 +13138,7 @@ const docTemplate = `{
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"circuit_breaker_enabled": {
|
"circuit_breaker_enabled": {
|
||||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).",
|
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"circuit_breaker_failure_threshold": {
|
"circuit_breaker_failure_threshold": {
|
||||||
|
|||||||
Generated
+1
-1
@@ -11686,7 +11686,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"circuit_breaker_enabled": {
|
"circuit_breaker_enabled": {
|
||||||
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider rate limits (429, 503, 529 overloaded).",
|
"description": "Circuit breaker protects against cascading failures from upstream AI\nprovider overload (503, 529).",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"circuit_breaker_failure_threshold": {
|
"circuit_breaker_failure_threshold": {
|
||||||
|
|||||||
@@ -3823,7 +3823,7 @@ Write out the current server config as YAML to stdout.`,
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "AI Bridge Circuit Breaker Enabled",
|
Name: "AI Bridge Circuit Breaker Enabled",
|
||||||
Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).",
|
Description: "Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).",
|
||||||
Flag: "aibridge-circuit-breaker-enabled",
|
Flag: "aibridge-circuit-breaker-enabled",
|
||||||
Env: "CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED",
|
Env: "CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED",
|
||||||
Value: &c.AI.BridgeConfig.CircuitBreakerEnabled,
|
Value: &c.AI.BridgeConfig.CircuitBreakerEnabled,
|
||||||
@@ -4074,7 +4074,7 @@ type AIBridgeConfig struct {
|
|||||||
SendActorHeaders serpent.Bool `json:"send_actor_headers" typescript:",notnull"`
|
SendActorHeaders serpent.Bool `json:"send_actor_headers" typescript:",notnull"`
|
||||||
AllowBYOK serpent.Bool `json:"allow_byok" typescript:",notnull"`
|
AllowBYOK serpent.Bool `json:"allow_byok" typescript:",notnull"`
|
||||||
// Circuit breaker protects against cascading failures from upstream AI
|
// Circuit breaker protects against cascading failures from upstream AI
|
||||||
// provider rate limits (429, 503, 529 overloaded).
|
// provider overload (503, 529).
|
||||||
CircuitBreakerEnabled serpent.Bool `json:"circuit_breaker_enabled" typescript:",notnull"`
|
CircuitBreakerEnabled serpent.Bool `json:"circuit_breaker_enabled" typescript:",notnull"`
|
||||||
CircuitBreakerFailureThreshold serpent.Int64 `json:"circuit_breaker_failure_threshold" typescript:",notnull"`
|
CircuitBreakerFailureThreshold serpent.Int64 `json:"circuit_breaker_failure_threshold" typescript:",notnull"`
|
||||||
CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"`
|
CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"`
|
||||||
|
|||||||
Generated
+1
-1
@@ -487,7 +487,7 @@
|
|||||||
| `allow_byok` | boolean | false | | |
|
| `allow_byok` | boolean | false | | |
|
||||||
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
||||||
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AIBRIDGE_PROVIDER_<N>_* env vars instead. |
|
||||||
| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded). |
|
| `circuit_breaker_enabled` | boolean | false | | Circuit breaker protects against cascading failures from upstream AI provider overload (503, 529). |
|
||||||
| `circuit_breaker_failure_threshold` | integer | false | | |
|
| `circuit_breaker_failure_threshold` | integer | false | | |
|
||||||
| `circuit_breaker_interval` | integer | false | | |
|
| `circuit_breaker_interval` | integer | false | | |
|
||||||
| `circuit_breaker_max_requests` | integer | false | | |
|
| `circuit_breaker_max_requests` | integer | false | | |
|
||||||
|
|||||||
Generated
+1
-1
@@ -1899,7 +1899,7 @@ Allow users to provide their own LLM API keys or subscriptions. When disabled, o
|
|||||||
| YAML | <code>aibridge.circuit_breaker_enabled</code> |
|
| YAML | <code>aibridge.circuit_breaker_enabled</code> |
|
||||||
| Default | <code>false</code> |
|
| Default | <code>false</code> |
|
||||||
|
|
||||||
Enable the circuit breaker to protect against cascading failures from upstream AI provider rate limits (429, 503, 529 overloaded).
|
Enable the circuit breaker to protect against cascading failures from upstream AI provider overload (503, 529).
|
||||||
|
|
||||||
### --aibridge-proxy-enabled
|
### --aibridge-proxy-enabled
|
||||||
|
|
||||||
|
|||||||
@@ -438,13 +438,13 @@ func TestIntegrationCircuitBreaker(t *testing.T) {
|
|||||||
registry := prometheus.NewRegistry()
|
registry := prometheus.NewRegistry()
|
||||||
metrics := aibridge.NewMetrics(registry)
|
metrics := aibridge.NewMetrics(registry)
|
||||||
|
|
||||||
// Set up mock OpenAI server that always returns 429 Too Many Requests.
|
// Set up mock OpenAI server that always returns 503 Service Unavailable.
|
||||||
mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
// Disable SDK retries.
|
// Disable SDK retries.
|
||||||
w.Header().Set("x-should-retry", "false")
|
w.Header().Set("x-should-retry", "false")
|
||||||
w.WriteHeader(http.StatusTooManyRequests)
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
_, _ = w.Write([]byte(`{"error":{"type":"rate_limit_error","message":"rate limited","code":"rate_limit_exceeded"}}`))
|
_, _ = w.Write([]byte(`{"error":{"message":"Service Unavailable.","type":"cf_service_unavailable","code":503}}`))
|
||||||
}))
|
}))
|
||||||
t.Cleanup(mockOpenAI.Close)
|
t.Cleanup(mockOpenAI.Close)
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -139,7 +139,7 @@ AI BRIDGE OPTIONS:
|
|||||||
|
|
||||||
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
--aibridge-circuit-breaker-enabled bool, $CODER_AIBRIDGE_CIRCUIT_BREAKER_ENABLED (default: false)
|
||||||
Enable the circuit breaker to protect against cascading failures from
|
Enable the circuit breaker to protect against cascading failures from
|
||||||
upstream AI provider rate limits (429, 503, 529 overloaded).
|
upstream AI provider overload (503, 529).
|
||||||
|
|
||||||
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
--aibridge-retention duration, $CODER_AIBRIDGE_RETENTION (default: 60d)
|
||||||
Length of time to retain data such as interceptions and all related
|
Length of time to retain data such as interceptions and all related
|
||||||
|
|||||||
Generated
+1
-1
@@ -70,7 +70,7 @@ export interface AIBridgeConfig {
|
|||||||
readonly allow_byok: boolean;
|
readonly allow_byok: boolean;
|
||||||
/**
|
/**
|
||||||
* Circuit breaker protects against cascading failures from upstream AI
|
* Circuit breaker protects against cascading failures from upstream AI
|
||||||
* provider rate limits (429, 503, 529 overloaded).
|
* provider overload (503, 529).
|
||||||
*/
|
*/
|
||||||
readonly circuit_breaker_enabled: boolean;
|
readonly circuit_breaker_enabled: boolean;
|
||||||
readonly circuit_breaker_failure_threshold: number;
|
readonly circuit_breaker_failure_threshold: number;
|
||||||
|
|||||||
Reference in New Issue
Block a user