feat: add automatic key failover for AI Bridge passthrough (#24920)

## Description

Adds automatic key failover for passthrough routes for the Anthropic and OpenAI providers. A new `keyFailoverTransport` wraps the reverse-proxy transport: centralized requests walk the configured key pool and retry with the next key on key-specific failures (401/403/429), reusing the same key-marking semantics as the bridged routes.

BYOK passthrough requests run as a single attempt with no failover.

## Changes

- New `keypool.KeyFailoverConfig` carrying the `Pool` to walk and the provider-specific closures (`IsBYOK`, `InjectAuthKey`, `MarkKey`, `BuildExhaustedResponse`).
- New `keypool.NewKeyFailoverTransport`: wraps an inner `http.RoundTripper`. Returns `inner` unchanged when `Pool` is nil, otherwise produces a transport that buffers the request body once, walks the pool per request, and replays each attempt with the next key.
- New `Provider.KeyFailoverConfig(logger)` interface method. Anthropic injects `X-Api-Key`; OpenAI injects `Authorization: Bearer ...`; Copilot returns an empty config.
- `passthrough.go` wires `NewKeyFailoverTransport` around the existing apidump middleware, so every retry attempt is recorded.

## Related Issues

Related to: https://github.com/coder/internal/issues/1446
Related to: https://linear.app/codercom/issue/AIGOV-197/aibridge-automatic-key-failover-for-bridged-and-passthrough-routes

## Follow-up PRs

- Remove dead `Provider.InjectAuthHeader` method now that all auth is applied per-attempt by `KeyFailoverTransport`.
- Bedrock multi-key support.
- Refactor provider vs interceptor config separation.
- Record the actually-used key in the interception credential hint after failover.

> [!NOTE]
> Initially generated by Claude Opus 4.7, modified and reviewed by @ssncferreira
This commit is contained in:
Susana Ferreira
2026-05-07 15:46:36 +01:00
committed by GitHub
parent b94a0aebcd
commit 0766cc3097
23 changed files with 758 additions and 77 deletions
+11 -9
View File
@@ -13,6 +13,7 @@ import (
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/aibridge/intercept/apidump"
"github.com/coder/coder/v2/aibridge/keypool"
"github.com/coder/coder/v2/aibridge/metrics"
"github.com/coder/coder/v2/aibridge/provider"
"github.com/coder/coder/v2/aibridge/tracing"
@@ -41,13 +42,17 @@ func newPassthroughRouter(prov provider.Provider, logger slog.Logger, m *metrics
ExpectContinueTimeout: 1 * time.Second,
}
// Build a reverse proxy to the upstream, reused across all requests for this provider.
// All request modifications happen in Rewrite.
// Build the passthrough proxy, reused across all requests for this provider.
// Rewrite sets proxy headers. For centralized requests, KeyFailoverTransport
// handles auth and failover. BYOK requests pass through.
proxy := &httputil.ReverseProxy{
Rewrite: func(pr *httputil.ProxyRequest) {
rewritePassthroughRequest(pr, provBaseURL, prov)
rewritePassthroughRequest(pr, provBaseURL)
},
Transport: apidump.NewPassthroughMiddleware(t, prov.APIDumpDir(), prov.Name(), logger, quartz.NewReal()),
Transport: keypool.NewKeyFailoverTransport(
apidump.NewPassthroughMiddleware(t, prov.APIDumpDir(), prov.Name(), logger, quartz.NewReal()),
prov.KeyFailoverConfig(logger),
),
ErrorHandler: func(rw http.ResponseWriter, req *http.Request, e error) {
logger.Warn(req.Context(), "reverse proxy error", slog.Error(e), slog.F("path", req.URL.Path))
http.Error(rw, "upstream proxy error", http.StatusBadGateway)
@@ -67,8 +72,8 @@ func newPassthroughRouter(prov provider.Provider, logger slog.Logger, m *metrics
}
// rewritePassthroughRequest configures the outbound request for the upstream and
// applies proxy headers and provider auth.
func rewritePassthroughRequest(pr *httputil.ProxyRequest, provBaseURL *url.URL, prov provider.Provider) {
// applies proxy headers.
func rewritePassthroughRequest(pr *httputil.ProxyRequest, provBaseURL *url.URL) {
pr.SetURL(provBaseURL)
// Rewrite sets "X-Forwarded-For" to just last hop (clients IP address).
@@ -87,9 +92,6 @@ func rewritePassthroughRequest(pr *httputil.ProxyRequest, provBaseURL *url.URL,
if _, ok := pr.Out.Header["User-Agent"]; !ok {
pr.Out.Header.Set("User-Agent", "aibridge") // TODO: use build tag.
}
// Inject provider auth.
prov.InjectAuthHeader(&pr.Out.Header)
}
// newInvalidBaseURLHandler returns a handler that always returns 502