refactor: load AI providers from the database at startup (#25672)

Replace the env-based `BuildProviders` with a DB-backed loader. The database is now the single source of truth for runtime provider configuration; env config arrives via `SeedAIProvidersFromEnv` (run at boot) and `BuildProviders` reads it back as `aibridge.Provider` instances. `cli/server.go` and `enterprise/cli/server.go` both call the same path, so aibridged and aibridgeproxyd see the same provider set.

Per-provider `DumpDir` is replaced by a top-level `CODER_AI_GATEWAY_DUMP_DIR` base; each provider's effective dump path is `<base>/<provider name>`.
This commit is contained in:
Danny Kopping
2026-05-26 15:57:01 +02:00
committed by GitHub
parent dfd7ca3b98
commit 282ab7de34
19 changed files with 570 additions and 258 deletions
+197 -174
View File
@@ -5,15 +5,21 @@ package cli
import ( import (
"context" "context"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/aibridge" "github.com/coder/coder/v2/aibridge"
"github.com/coder/coder/v2/aibridge/config" "github.com/coder/coder/v2/aibridge/config"
"github.com/coder/coder/v2/aibridge/keypool" "github.com/coder/coder/v2/aibridge/keypool"
"github.com/coder/coder/v2/coderd" "github.com/coder/coder/v2/coderd"
"github.com/coder/coder/v2/coderd/aibridged" "github.com/coder/coder/v2/coderd/aibridged"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/db2sdk"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/quartz" "github.com/coder/quartz"
) )
@@ -44,183 +50,200 @@ func newAIBridgeDaemon(coderAPI *coderd.API, providers []aibridge.Provider) (*ai
return srv, nil return srv, nil
} }
// BuildProviders constructs the list of AI providers from config. // BuildProviders loads every enabled ai_providers row, attaches its
// It merges legacy single-provider env vars and indexed provider configs: // keys, and constructs the equivalent [aibridge.Provider] instances.
// 1. Legacy providers (from CODER_AI_GATEWAY_OPENAI_KEY, etc.) are added first. // The database is the single source of truth for runtime provider
// If a legacy name conflicts with an indexed provider, startup fails with // configuration.
// a clear error asking the admin to remove one or the other. //
// 2. Indexed providers (from CODER_AI_GATEWAY_PROVIDER_<N>_*) are added next. // Per-provider construction errors are logged and the offending row is
func BuildProviders(cfg codersdk.AIBridgeConfig) ([]aibridge.Provider, error) { // excluded from the returned snapshot; only a failure of the DB query
var cbConfig *config.CircuitBreaker // itself is propagated. This keeps a single misconfigured row from
if cfg.CircuitBreakerEnabled.Value() { // taking the whole daemon down.
cbConfig = &config.CircuitBreaker{ func BuildProviders(ctx context.Context, db database.Store, cfg codersdk.AIBridgeConfig, logger slog.Logger) ([]aibridge.Provider, error) {
//nolint:gocritic // AsAIBridged has a minimal permission set for this purpose.
authCtx := dbauthz.AsAIBridged(ctx)
var rows []database.AIProvider
keysByProvider := make(map[uuid.UUID][]database.AIProviderKey)
// Wrap both queries in a read-only transaction so the provider list
// and the key list are consistent with each other.
err := db.InTx(func(tx database.Store) error {
var err error
rows, err = tx.GetAIProviders(authCtx, database.GetAIProvidersParams{
IncludeDisabled: false,
})
if err != nil {
return xerrors.Errorf("load ai providers: %w", err)
}
if len(rows) == 0 {
return nil
}
// Load keys only for the enabled providers to avoid materializing
// secrets for disabled rows.
ids := make([]uuid.UUID, len(rows))
for i, r := range rows {
ids[i] = r.ID
}
keyRows, err := tx.GetAIProviderKeysByProviderIDs(authCtx, ids)
if err != nil {
return xerrors.Errorf("load ai provider keys: %w", err)
}
for _, k := range keyRows {
keysByProvider[k.ProviderID] = append(keysByProvider[k.ProviderID], k)
}
return nil
}, &database.TxOptions{ReadOnly: true, TxIdentifier: "build_ai_providers"})
if err != nil {
return nil, err
}
out := make([]aibridge.Provider, 0, len(rows))
for _, row := range rows {
prov, err := buildAIProviderFromRow(row, keysByProvider[row.ID], cfg)
if err != nil {
logger.Error(ctx, "skipping misconfigured ai provider",
slog.F("provider_id", row.ID),
slog.F("provider_name", row.Name),
slog.F("provider_type", string(row.Type)),
slog.Error(err),
)
continue
}
out = append(out, prov)
}
if len(rows) > 0 && len(out) == 0 {
logger.Warn(ctx, "all enabled ai providers failed to build; daemon will start with zero providers")
}
return out, nil
}
// buildAIProviderFromRow decodes the settings blob and constructs the
// appropriate [aibridge.Provider] for a single ai_providers row.
func buildAIProviderFromRow(
row database.AIProvider,
keys []database.AIProviderKey,
cfg codersdk.AIBridgeConfig,
) (aibridge.Provider, error) {
settings, err := db2sdk.AIProviderSettings(row.Settings)
if err != nil {
return nil, xerrors.Errorf("decode settings: %w", err)
}
cbCfg := circuitBreakerConfig(cfg)
sendActorHeaders := cfg.SendActorHeaders.Value()
dumpDir := cfg.APIDumpDir.Value()
switch row.Type {
case database.AiProviderTypeOpenai:
if len(keys) == 0 && !cfg.AllowBYOK.Value() {
return nil, xerrors.New("openai provider has no api keys configured and BYOK is not enabled")
}
var pool *keypool.Pool
if len(keys) > 0 {
var err error
pool, err = buildAIProviderKeyPool(keys)
if err != nil {
return nil, xerrors.Errorf("openai key pool: %w", err)
}
}
return aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{
Name: row.Name,
BaseURL: row.BaseUrl,
KeyPool: pool,
APIDumpDir: dumpDir,
CircuitBreaker: cbCfg,
SendActorHeaders: sendActorHeaders,
}), nil
case database.AiProviderTypeAnthropic:
bedrock := bedrockConfigFromRow(row, settings)
// Bedrock-backed Anthropic authenticates via AWS credentials in
// the settings blob, not the api_keys table. A bearer-token
// Anthropic without any key cannot make upstream calls.
if bedrock == nil && len(keys) == 0 && !cfg.AllowBYOK.Value() {
return nil, xerrors.New("anthropic provider has no api keys, no bedrock credentials, and BYOK is not enabled")
}
var pool *keypool.Pool
if len(keys) > 0 {
var err error
pool, err = buildAIProviderKeyPool(keys)
if err != nil {
return nil, xerrors.Errorf("anthropic key pool: %w", err)
}
}
return aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{
Name: row.Name,
BaseURL: row.BaseUrl,
KeyPool: pool,
APIDumpDir: dumpDir,
CircuitBreaker: cbCfg,
SendActorHeaders: sendActorHeaders,
}, bedrock), nil
case database.AiProviderTypeCopilot:
// Copilot is always BYOK; the per-user token is supplied on each
// request via the Authorization header, so no keypool is built.
return aibridge.NewCopilotProvider(aibridge.CopilotConfig{
Name: row.Name,
BaseURL: row.BaseUrl,
APIDumpDir: dumpDir,
CircuitBreaker: cbCfg,
}), nil
default:
return nil, xerrors.Errorf("unsupported provider type: %q", row.Type)
}
}
// buildAIProviderKeyPool builds a [keypool.Pool]. Callers must check
// len(keys) > 0 first; keypool.New rejects empty input.
func buildAIProviderKeyPool(keys []database.AIProviderKey) (*keypool.Pool, error) {
raw := make([]string, 0, len(keys))
for _, k := range keys {
raw = append(raw, k.APIKey)
}
return keypool.New(raw, quartz.NewReal())
}
// bedrockConfigFromRow returns nil when the settings have no Bedrock
// discriminator or when the Bedrock fields are not actually configured.
// The provider row's BaseUrl is the generic upstream endpoint and is
// always non-empty, so it cannot serve as a Bedrock detection signal;
// gate on the settings blob alone via [codersdk.AIProviderBedrockSettings.IsConfigured].
func bedrockConfigFromRow(row database.AIProvider, settings codersdk.AIProviderSettings) *aibridge.AWSBedrockConfig {
if settings.Bedrock == nil {
return nil
}
bedrockSettings := *settings.Bedrock
if !bedrockSettings.IsConfigured() {
return nil
}
accessKey := ptr.NilToEmpty(bedrockSettings.AccessKey)
accessKeySecret := ptr.NilToEmpty(bedrockSettings.AccessKeySecret)
return &aibridge.AWSBedrockConfig{
BaseURL: row.BaseUrl,
Region: bedrockSettings.Region,
AccessKey: accessKey,
AccessKeySecret: accessKeySecret,
Model: bedrockSettings.Model,
SmallFastModel: bedrockSettings.SmallFastModel,
}
}
// circuitBreakerConfig returns nil when the breaker is disabled.
func circuitBreakerConfig(cfg codersdk.AIBridgeConfig) *config.CircuitBreaker {
if !cfg.CircuitBreakerEnabled.Value() {
return nil
}
return &config.CircuitBreaker{
FailureThreshold: uint32(cfg.CircuitBreakerFailureThreshold.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. FailureThreshold: uint32(cfg.CircuitBreakerFailureThreshold.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options.
Interval: cfg.CircuitBreakerInterval.Value(), Interval: cfg.CircuitBreakerInterval.Value(),
Timeout: cfg.CircuitBreakerTimeout.Value(), Timeout: cfg.CircuitBreakerTimeout.Value(),
MaxRequests: uint32(cfg.CircuitBreakerMaxRequests.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. MaxRequests: uint32(cfg.CircuitBreakerMaxRequests.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options.
} }
} }
var providers []aibridge.Provider
usedNames := make(map[string]struct{})
// Collect names from indexed providers so we can detect conflicts
// with legacy providers.
for _, p := range cfg.Providers {
name := p.Name
if name == "" {
name = p.Type
}
usedNames[name] = struct{}{}
}
// Add legacy OpenAI provider if configured.
if cfg.LegacyOpenAI.Key.String() != "" {
if _, conflict := usedNames[aibridge.ProviderOpenAI]; conflict {
return nil, xerrors.Errorf("legacy CODER_AI_GATEWAY_OPENAI_KEY (or CODER_AIBRIDGE_OPENAI_KEY) conflicts with indexed provider named %q; remove one or the other", aibridge.ProviderOpenAI)
}
providers = append(providers, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{
Name: aibridge.ProviderOpenAI,
BaseURL: cfg.LegacyOpenAI.BaseURL.String(),
Key: cfg.LegacyOpenAI.Key.String(),
CircuitBreaker: cbConfig,
SendActorHeaders: cfg.SendActorHeaders.Value(),
}))
usedNames[aibridge.ProviderOpenAI] = struct{}{}
}
// Add legacy Anthropic provider if configured. Bedrock credentials
// alone are sufficient, an Anthropic API key is not required when
// using AWS Bedrock.
if cfg.LegacyAnthropic.Key.String() != "" || getBedrockConfig(cfg.LegacyBedrock) != nil {
if _, conflict := usedNames[aibridge.ProviderAnthropic]; conflict {
return nil, xerrors.Errorf("legacy CODER_AI_GATEWAY_ANTHROPIC_KEY (or CODER_AIBRIDGE_ANTHROPIC_KEY) conflicts with indexed provider named %q; remove one or the other", aibridge.ProviderAnthropic)
}
var pool *keypool.Pool
if key := cfg.LegacyAnthropic.Key.String(); key != "" {
var err error
pool, err = keypool.New([]string{key}, quartz.NewReal())
if err != nil {
return nil, xerrors.Errorf("create legacy anthropic key pool: %w", err)
}
}
providers = append(providers, aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{
Name: aibridge.ProviderAnthropic,
BaseURL: cfg.LegacyAnthropic.BaseURL.String(),
KeyPool: pool,
CircuitBreaker: cbConfig,
SendActorHeaders: cfg.SendActorHeaders.Value(),
}, getBedrockConfig(cfg.LegacyBedrock)))
usedNames[aibridge.ProviderAnthropic] = struct{}{}
}
// Add indexed providers.
for _, p := range cfg.Providers {
name := p.Name
if name == "" {
name = p.Type
}
switch p.Type {
case aibridge.ProviderOpenAI:
var pool *keypool.Pool
if len(p.Keys) > 0 {
var err error
pool, err = keypool.New(p.Keys, quartz.NewReal())
if err != nil {
return nil, xerrors.Errorf("create openai key pool for provider %q: %w", name, err)
}
}
providers = append(providers, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{
Name: name,
BaseURL: p.BaseURL,
KeyPool: pool,
APIDumpDir: p.DumpDir,
CircuitBreaker: cbConfig,
SendActorHeaders: cfg.SendActorHeaders.Value(),
}))
case aibridge.ProviderAnthropic:
var pool *keypool.Pool
if len(p.Keys) > 0 {
var err error
pool, err = keypool.New(p.Keys, quartz.NewReal())
if err != nil {
return nil, xerrors.Errorf("create anthropic key pool for provider %q: %w", name, err)
}
}
providers = append(providers, aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{
Name: name,
BaseURL: p.BaseURL,
KeyPool: pool,
APIDumpDir: p.DumpDir,
CircuitBreaker: cbConfig,
SendActorHeaders: cfg.SendActorHeaders.Value(),
}, bedrockConfigFromProvider(p)))
case aibridge.ProviderCopilot:
providers = append(providers, aibridge.NewCopilotProvider(aibridge.CopilotConfig{
Name: name,
BaseURL: p.BaseURL,
APIDumpDir: p.DumpDir,
CircuitBreaker: cbConfig,
}))
default:
return nil, xerrors.Errorf("unknown provider type %q for provider %q", p.Type, name)
}
}
return providers, nil
}
// bedrockConfigFromProvider converts Bedrock fields from an indexed
// AIProviderConfig into an aibridge AWSBedrockConfig.
// Returns nil if no Bedrock fields are set.
func bedrockConfigFromProvider(p codersdk.AIProviderConfig) *aibridge.AWSBedrockConfig {
// Currently, only the first key pair is used, if any.
// TODO(ssncferreira): pass a keypool.Pool instead.
var accessKey, accessKeySecret string
if len(p.BedrockAccessKeys) > 0 {
accessKey = p.BedrockAccessKeys[0]
}
if len(p.BedrockAccessKeySecrets) > 0 {
accessKeySecret = p.BedrockAccessKeySecrets[0]
}
settings := codersdk.NewAIProviderBedrockSettings(
p.BedrockRegion, accessKey, accessKeySecret,
p.BedrockModel, p.BedrockSmallFastModel,
)
if !codersdk.IsBedrockConfigured(p.BedrockBaseURL, settings) {
return nil
}
return &aibridge.AWSBedrockConfig{
BaseURL: p.BedrockBaseURL,
Region: p.BedrockRegion,
AccessKey: accessKey,
AccessKeySecret: accessKeySecret,
Model: p.BedrockModel,
SmallFastModel: p.BedrockSmallFastModel,
}
}
func getBedrockConfig(cfg codersdk.AIBridgeBedrockConfig) *aibridge.AWSBedrockConfig {
// codersdk.IsBedrockConfigured decides what counts as Bedrock; when
// it returns false, the AWS SDK default credential chain (env vars,
// shared config, IAM roles, etc.) is left to resolve credentials.
settings := codersdk.NewAIProviderBedrockSettings(
cfg.Region.String(),
cfg.AccessKey.String(),
cfg.AccessKeySecret.String(),
cfg.Model.String(),
cfg.SmallFastModel.String(),
)
if !codersdk.IsBedrockConfigured(cfg.BaseURL.String(), settings) {
return nil
}
return &aibridge.AWSBedrockConfig{
BaseURL: cfg.BaseURL.String(),
Region: cfg.Region.String(),
AccessKey: cfg.AccessKey.String(),
AccessKeySecret: cfg.AccessKeySecret.String(),
Model: cfg.Model.String(),
SmallFastModel: cfg.SmallFastModel.String(),
}
}
+201 -29
View File
@@ -3,23 +3,47 @@
package cli package cli
import ( import (
"database/sql"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/aibridge" "github.com/coder/coder/v2/aibridge"
"github.com/coder/coder/v2/coderd"
agplaibridge "github.com/coder/coder/v2/coderd/aibridge" agplaibridge "github.com/coder/coder/v2/coderd/aibridge"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/testutil"
"github.com/coder/serpent" "github.com/coder/serpent"
) )
// buildFromEnv exercises the same env-config-in/providers-out path that
// production uses on boot: SeedAIProvidersFromEnv writes the env-derived
// rows to the database, and BuildProviders reads them back as runtime
// [aibridge.Provider] instances. This keeps the existing TestBuildProviders
// table intact while reflecting the post-refactor flow where the database
// is the single source of truth.
func buildFromEnv(t *testing.T, cfg codersdk.AIBridgeConfig) ([]aibridge.Provider, error) {
t.Helper()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
logger := slogtest.Make(t, nil)
if err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, logger); err != nil {
return nil, err
}
return BuildProviders(ctx, db, cfg, logger)
}
func TestBuildProviders(t *testing.T) { func TestBuildProviders(t *testing.T) {
t.Parallel() t.Parallel()
t.Run("EmptyConfig", func(t *testing.T) { t.Run("EmptyConfig", func(t *testing.T) {
t.Parallel() t.Parallel()
providers, err := BuildProviders(codersdk.AIBridgeConfig{}) providers, err := buildFromEnv(t, codersdk.AIBridgeConfig{})
require.NoError(t, err) require.NoError(t, err)
assert.Empty(t, providers) assert.Empty(t, providers)
}) })
@@ -30,7 +54,7 @@ func TestBuildProviders(t *testing.T) {
cfg.LegacyOpenAI.Key = serpent.String("sk-openai") cfg.LegacyOpenAI.Key = serpent.String("sk-openai")
cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic") cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic")
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
names := providerNames(providers) names := providerNames(providers)
@@ -47,25 +71,26 @@ func TestBuildProviders(t *testing.T) {
Type: aibridge.ProviderAnthropic, Type: aibridge.ProviderAnthropic,
Name: "anthropic-zdr", Name: "anthropic-zdr",
Keys: []string{"sk-zdr"}, Keys: []string{"sk-zdr"},
DumpDir: "/tmp/anthropic-dump",
}, },
{ {
Type: aibridge.ProviderOpenAI, Type: aibridge.ProviderOpenAI,
Name: "openai-azure", Name: "openai-azure",
Keys: []string{"sk-azure"}, Keys: []string{"sk-azure"},
BaseURL: "https://azure.openai.com", BaseURL: "https://azure.openai.com",
DumpDir: "/tmp/openai-dump",
}, },
}, },
} }
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, providers, 2)
names := providerNames(providers) byName := make(map[string]aibridge.Provider, len(providers))
assert.Equal(t, []string{"anthropic-zdr", "openai-azure"}, names) for _, p := range providers {
assert.Equal(t, "/tmp/anthropic-dump", providers[0].APIDumpDir()) byName[p.Name()] = p
assert.Equal(t, "/tmp/openai-dump", providers[1].APIDumpDir()) }
require.Contains(t, byName, "anthropic-zdr")
require.Contains(t, byName, "openai-azure")
}) })
t.Run("LegacyOpenAIConflictsWithIndexed", func(t *testing.T) { t.Run("LegacyOpenAIConflictsWithIndexed", func(t *testing.T) {
@@ -77,9 +102,9 @@ func TestBuildProviders(t *testing.T) {
} }
cfg.LegacyOpenAI.Key = serpent.String("sk-legacy") cfg.LegacyOpenAI.Key = serpent.String("sk-legacy")
_, err := BuildProviders(cfg) _, err := buildFromEnv(t, cfg)
require.Error(t, err) require.Error(t, err)
assert.Contains(t, err.Error(), "conflicts with indexed provider") assert.Contains(t, err.Error(), "conflicts with the legacy env var")
}) })
t.Run("LegacyAnthropicConflictsWithIndexed", func(t *testing.T) { t.Run("LegacyAnthropicConflictsWithIndexed", func(t *testing.T) {
@@ -91,9 +116,9 @@ func TestBuildProviders(t *testing.T) {
} }
cfg.LegacyAnthropic.Key = serpent.String("sk-legacy") cfg.LegacyAnthropic.Key = serpent.String("sk-legacy")
_, err := BuildProviders(cfg) _, err := buildFromEnv(t, cfg)
require.Error(t, err) require.Error(t, err)
assert.Contains(t, err.Error(), "conflicts with indexed provider") assert.Contains(t, err.Error(), "conflicts with the legacy env var")
}) })
t.Run("MixedLegacyAndIndexed", func(t *testing.T) { t.Run("MixedLegacyAndIndexed", func(t *testing.T) {
@@ -106,7 +131,7 @@ func TestBuildProviders(t *testing.T) {
cfg.LegacyOpenAI.Key = serpent.String("sk-openai") cfg.LegacyOpenAI.Key = serpent.String("sk-openai")
cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic") cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic")
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
names := providerNames(providers) names := providerNames(providers)
@@ -123,7 +148,7 @@ func TestBuildProviders(t *testing.T) {
cfg.LegacyBedrock.AccessKey = serpent.String("AKID") cfg.LegacyBedrock.AccessKey = serpent.String("AKID")
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret") cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret")
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
names := providerNames(providers) names := providerNames(providers)
@@ -139,7 +164,7 @@ func TestBuildProviders(t *testing.T) {
cfg.LegacyBedrock.AccessKey = serpent.String("AKID") cfg.LegacyBedrock.AccessKey = serpent.String("AKID")
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret") cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret")
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, providers, 1) require.Len(t, providers, 1)
@@ -150,15 +175,18 @@ func TestBuildProviders(t *testing.T) {
t.Run("UnknownType", func(t *testing.T) { t.Run("UnknownType", func(t *testing.T) {
t.Parallel() t.Parallel()
// Unknown provider types are dropped by the seed step (logged
// and skipped) so one misconfigured row cannot stop the daemon
// from starting. The end state is "no providers", not an error.
cfg := codersdk.AIBridgeConfig{ cfg := codersdk.AIBridgeConfig{
Providers: []codersdk.AIProviderConfig{ Providers: []codersdk.AIProviderConfig{
{Type: "gemini", Name: "gemini-pro"}, {Type: "gemini", Name: "gemini-pro"},
}, },
} }
_, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.Error(t, err) require.NoError(t, err)
assert.Contains(t, err.Error(), "unknown provider type") assert.Empty(t, providers)
}) })
t.Run("CopilotVariants", func(t *testing.T) { t.Run("CopilotVariants", func(t *testing.T) {
@@ -167,22 +195,25 @@ func TestBuildProviders(t *testing.T) {
// Copilot API hosts via an explicit BASE_URL. // Copilot API hosts via an explicit BASE_URL.
cfg := codersdk.AIBridgeConfig{ cfg := codersdk.AIBridgeConfig{
Providers: []codersdk.AIProviderConfig{ Providers: []codersdk.AIProviderConfig{
{Type: aibridge.ProviderCopilot, Name: aibridge.ProviderCopilot, DumpDir: "/tmp/copilot-dump"}, {Type: aibridge.ProviderCopilot, Name: aibridge.ProviderCopilot},
{Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotBusiness, BaseURL: "https://" + agplaibridge.HostCopilotBusiness}, {Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotBusiness, BaseURL: "https://" + agplaibridge.HostCopilotBusiness},
{Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotEnterprise, BaseURL: "https://" + agplaibridge.HostCopilotEnterprise}, {Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotEnterprise, BaseURL: "https://" + agplaibridge.HostCopilotEnterprise},
}, },
} }
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, providers, 3) require.Len(t, providers, 3)
assert.Equal(t, aibridge.ProviderCopilot, providers[0].Name()) byName := make(map[string]aibridge.Provider, len(providers))
assert.Equal(t, "/tmp/copilot-dump", providers[0].APIDumpDir()) for _, p := range providers {
assert.Equal(t, agplaibridge.ProviderCopilotBusiness, providers[1].Name()) byName[p.Name()] = p
assert.Equal(t, "https://"+agplaibridge.HostCopilotBusiness, providers[1].BaseURL()) }
assert.Equal(t, agplaibridge.ProviderCopilotEnterprise, providers[2].Name()) require.Contains(t, byName, aibridge.ProviderCopilot)
assert.Equal(t, "https://"+agplaibridge.HostCopilotEnterprise, providers[2].BaseURL()) require.Contains(t, byName, agplaibridge.ProviderCopilotBusiness)
require.Contains(t, byName, agplaibridge.ProviderCopilotEnterprise)
assert.Equal(t, "https://"+agplaibridge.HostCopilotBusiness, byName[agplaibridge.ProviderCopilotBusiness].BaseURL())
assert.Equal(t, "https://"+agplaibridge.HostCopilotEnterprise, byName[agplaibridge.ProviderCopilotEnterprise].BaseURL())
}) })
t.Run("ChatGPTProvider", func(t *testing.T) { t.Run("ChatGPTProvider", func(t *testing.T) {
@@ -191,17 +222,158 @@ func TestBuildProviders(t *testing.T) {
// base URL. Admins configure it as an indexed openai provider. // base URL. Admins configure it as an indexed openai provider.
cfg := codersdk.AIBridgeConfig{ cfg := codersdk.AIBridgeConfig{
Providers: []codersdk.AIProviderConfig{ Providers: []codersdk.AIProviderConfig{
{Type: aibridge.ProviderOpenAI, Name: agplaibridge.ProviderChatGPT, BaseURL: agplaibridge.BaseURLChatGPT}, {Type: aibridge.ProviderOpenAI, Name: agplaibridge.ProviderChatGPT, Keys: []string{"sk-chatgpt"}, BaseURL: agplaibridge.BaseURLChatGPT},
}, },
} }
providers, err := BuildProviders(cfg) providers, err := buildFromEnv(t, cfg)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, providers, 1) require.Len(t, providers, 1)
assert.Equal(t, agplaibridge.ProviderChatGPT, providers[0].Name()) assert.Equal(t, agplaibridge.ProviderChatGPT, providers[0].Name())
assert.Equal(t, agplaibridge.BaseURLChatGPT, providers[0].BaseURL()) assert.Equal(t, agplaibridge.BaseURLChatGPT, providers[0].BaseURL())
}) })
t.Run("NativeAnthropicDefaultBaseURL", func(t *testing.T) {
t.Parallel()
row := database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: aibridge.ProviderAnthropic,
BaseUrl: "https://api.anthropic.com/",
}
assert.Nil(t, bedrockConfigFromRow(row, codersdk.AIProviderSettings{}))
})
t.Run("NativeAnthropicCustomBaseURL", func(t *testing.T) {
t.Parallel()
row := database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic-proxy",
BaseUrl: "https://internal-proxy.example.com/anthropic/",
}
assert.Nil(t, bedrockConfigFromRow(row, codersdk.AIProviderSettings{}))
})
t.Run("BedrockSettingsPresent", func(t *testing.T) {
t.Parallel()
accessKey := "AKID"
secret := "secret"
model := "anthropic.claude-3-5-sonnet-20241022-v2:0"
smallModel := "anthropic.claude-3-5-haiku-20241022-v1:0"
row := database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic-bedrock",
BaseUrl: "https://bedrock-runtime.us-west-2.amazonaws.com/",
}
settings := codersdk.AIProviderSettings{
Bedrock: &codersdk.AIProviderBedrockSettings{
Region: "us-west-2",
AccessKey: &accessKey,
AccessKeySecret: &secret,
Model: model,
SmallFastModel: smallModel,
},
}
got := bedrockConfigFromRow(row, settings)
require.NotNil(t, got)
assert.Equal(t, row.BaseUrl, got.BaseURL)
assert.Equal(t, "us-west-2", got.Region)
assert.Equal(t, accessKey, got.AccessKey)
assert.Equal(t, secret, got.AccessKeySecret)
assert.Equal(t, model, got.Model)
assert.Equal(t, smallModel, got.SmallFastModel)
})
t.Run("BedrockSettingsEmpty", func(t *testing.T) {
t.Parallel()
// A non-nil but zero-valued Bedrock settings blob should not
// produce a Bedrock config; the provider's generic BaseUrl is
// not a Bedrock detection signal.
row := database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic-empty-bedrock",
BaseUrl: "https://api.anthropic.com/",
}
settings := codersdk.AIProviderSettings{
Bedrock: &codersdk.AIProviderBedrockSettings{},
}
assert.Nil(t, bedrockConfigFromRow(row, settings))
})
}
// TestBuildProvidersSkipsBadRows exercises the skip-and-continue path
// directly: rows whose settings blob is malformed or whose type is not
// supported by the runtime builder are logged and excluded from the
// returned snapshot without surfacing a top-level error. The seed path
// filters most of these out before insert, so we bypass it and insert
// rows straight into the database via dbgen.
func TestBuildProvidersSkipsBadRows(t *testing.T) {
t.Parallel()
t.Run("CorruptSettings", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
dbgen.AIProvider(t, db, database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic-broken",
BaseUrl: "https://api.anthropic.com/",
Settings: sql.NullString{String: "not-json", Valid: true},
})
providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger)
require.NoError(t, err)
assert.Empty(t, providers)
})
t.Run("UnsupportedType", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
// Azure is a valid DB-level provider type but has no runtime
// builder yet; it must hit the default branch and be skipped.
dbgen.AIProvider(t, db, database.AIProvider{
Type: database.AiProviderTypeAzure,
Name: "azure-openai",
BaseUrl: "https://example.openai.azure.com/",
})
providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger)
require.NoError(t, err)
assert.Empty(t, providers)
})
t.Run("BadRowDoesNotBlockGoodRow", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
dbgen.AIProvider(t, db, database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic-broken",
BaseUrl: "https://api.anthropic.com/",
Settings: sql.NullString{String: "{not valid json", Valid: true},
})
good := dbgen.AIProvider(t, db, database.AIProvider{
Type: database.AiProviderTypeOpenai,
Name: "openai-good",
BaseUrl: "https://api.openai.com/",
})
dbgen.AIProviderKey(t, db, database.AIProviderKey{
ProviderID: good.ID,
APIKey: "sk-good",
})
providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger)
require.NoError(t, err)
require.Len(t, providers, 1)
assert.Equal(t, "openai-good", providers[0].Name())
})
} }
func providerNames(providers []aibridge.Provider) []string { func providerNames(providers []aibridge.Provider) []string {
+27 -19
View File
@@ -899,6 +899,32 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
if err != nil { if err != nil {
return xerrors.Errorf("remove secrets from deployment values: %w", err) return xerrors.Errorf("remove secrets from deployment values: %w", err)
} }
// AI provider DB initialization runs synchronously here so
// authorized reads complete before any background goroutine
// starts. Otherwise a mid-startup cancellation can interrupt
// them and fail startup. Seeding must also happen before
// newAPI so the aibridgeproxyd in the enterprise closure
// observes env-configured providers.
//
// This is a once-off operation; once completed, all providers
// will be sourced from the database.
if err := coderd.SeedAIProvidersFromEnv(
ctx,
options.Database,
vals.AI.BridgeConfig,
logger.Named("aibridge.envseed"),
); err != nil {
return xerrors.Errorf("seed ai providers from env: %w", err)
}
var aibridgeProviders []aibridge.Provider
if vals.AI.BridgeConfig.Enabled.Value() {
aibridgeProviders, err = BuildProviders(ctx, options.Database, vals.AI.BridgeConfig, logger.Named("aibridge.providers"))
if err != nil {
return xerrors.Errorf("build AI providers: %w", err)
}
}
telemetryReporter, err := telemetry.New(telemetry.Options{ telemetryReporter, err := telemetry.New(telemetry.Options{
Disabled: !vals.Telemetry.Enable.Value(), Disabled: !vals.Telemetry.Enable.Value(),
BuiltinPostgres: builtinPostgres, BuiltinPostgres: builtinPostgres,
@@ -1006,18 +1032,6 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
notificationReportGenerator := reports.NewReportGenerator(ctx, logger.Named("notifications.report_generator"), options.Database, options.NotificationsEnqueuer, quartz.NewReal()) notificationReportGenerator := reports.NewReportGenerator(ctx, logger.Named("notifications.report_generator"), options.Database, options.NotificationsEnqueuer, quartz.NewReal())
defer notificationReportGenerator.Close() defer notificationReportGenerator.Close()
// Seed providers before newAPI so the aibridgeproxyd inside
// the enterprise closure observes env-configured providers
// at init.
if err := coderd.SeedAIProvidersFromEnv(
ctx,
options.Database,
vals.AI.BridgeConfig,
logger.Named("aibridge.envseed"),
); err != nil {
return xerrors.Errorf("seed ai providers from env: %w", err)
}
// We use a separate coderAPICloser so the Enterprise API // We use a separate coderAPICloser so the Enterprise API
// can have its own close functions. This is cleaner // can have its own close functions. This is cleaner
// than abstracting the Coder API itself. // than abstracting the Coder API itself.
@@ -1034,11 +1048,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
// unconditionally when the bridge feature is enabled by config so // unconditionally when the bridge feature is enabled by config so
// chatd can use it regardless of license entitlement. // chatd can use it regardless of license entitlement.
if vals.AI.BridgeConfig.Enabled.Value() { if vals.AI.BridgeConfig.Enabled.Value() {
providers, err := BuildProviders(vals.AI.BridgeConfig) aibridgeDaemon, err := newAIBridgeDaemon(coderAPI, aibridgeProviders)
if err != nil {
return xerrors.Errorf("build AI providers: %w", err)
}
aibridgeDaemon, err := newAIBridgeDaemon(coderAPI, providers)
if err != nil { if err != nil {
return xerrors.Errorf("create aibridged: %w", err) return xerrors.Errorf("create aibridged: %w", err)
} }
@@ -3114,8 +3124,6 @@ func readAIProvidersForPrefix(logger slog.Logger, environ []string, prefix strin
} }
case "BASE_URL": case "BASE_URL":
provider.BaseURL = v.Value provider.BaseURL = v.Value
case "DUMP_DIR":
provider.DumpDir = v.Value
case "BEDROCK_BASE_URL": case "BEDROCK_BASE_URL":
provider.BedrockBaseURL = v.Value provider.BedrockBaseURL = v.Value
case "BEDROCK_REGION": case "BEDROCK_REGION":
+51 -2
View File
@@ -10,8 +10,10 @@ import (
"cdr.dev/slog/v3" "cdr.dev/slog/v3"
"cdr.dev/slog/v3/sloggers/slogtest" "cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/aibridge" "github.com/coder/coder/v2/aibridge"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/testutil" "github.com/coder/coder/v2/testutil"
"github.com/coder/serpent"
) )
func TestReadAIProvidersFromEnv(t *testing.T) { func TestReadAIProvidersFromEnv(t *testing.T) {
@@ -34,7 +36,6 @@ func TestReadAIProvidersFromEnv(t *testing.T) {
"CODER_AIBRIDGE_PROVIDER_0_NAME=anthropic-zdr", "CODER_AIBRIDGE_PROVIDER_0_NAME=anthropic-zdr",
"CODER_AIBRIDGE_PROVIDER_0_KEY=sk-ant-xxx", "CODER_AIBRIDGE_PROVIDER_0_KEY=sk-ant-xxx",
"CODER_AIBRIDGE_PROVIDER_0_BASE_URL=https://api.anthropic.com/", "CODER_AIBRIDGE_PROVIDER_0_BASE_URL=https://api.anthropic.com/",
"CODER_AIBRIDGE_PROVIDER_0_DUMP_DIR=/tmp/aibridge-dump",
}, },
expected: []codersdk.AIProviderConfig{ expected: []codersdk.AIProviderConfig{
{ {
@@ -42,7 +43,6 @@ func TestReadAIProvidersFromEnv(t *testing.T) {
Name: "anthropic-zdr", Name: "anthropic-zdr",
Keys: []string{"sk-ant-xxx"}, Keys: []string{"sk-ant-xxx"},
BaseURL: "https://api.anthropic.com/", BaseURL: "https://api.anthropic.com/",
DumpDir: "/tmp/aibridge-dump",
}, },
}, },
}, },
@@ -537,3 +537,52 @@ func TestValidateLegacyAIBridgeConfig(t *testing.T) {
}) })
} }
} }
func TestBuildAIProviderFromRowSetsAPIDumpDir(t *testing.T) {
t.Parallel()
const dumpDir = "/tmp/coder-aibridge-dumps"
tests := []struct {
name string
row database.AIProvider
}{
{
name: "OpenAI",
row: database.AIProvider{
Type: database.AiProviderTypeOpenai,
Name: "openai",
BaseUrl: "https://api.openai.com/",
},
},
{
name: "Anthropic",
row: database.AIProvider{
Type: database.AiProviderTypeAnthropic,
Name: "anthropic",
BaseUrl: "https://api.anthropic.com/",
},
},
{
name: "Copilot",
row: database.AIProvider{
Type: database.AiProviderTypeCopilot,
Name: "copilot",
BaseUrl: "https://api.githubcopilot.com/",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
provider, err := buildAIProviderFromRow(tt.row, nil, codersdk.AIBridgeConfig{
AllowBYOK: serpent.Bool(true),
APIDumpDir: serpent.String(dumpDir),
})
require.NoError(t, err)
assert.Equal(t, dumpDir, provider.APIDumpDir())
})
}
}
+6
View File
@@ -113,6 +113,12 @@ AI GATEWAY OPTIONS:
with AI budgets. "highest" selects the group with the largest spend with AI budgets. "highest" selects the group with the largest spend
limit, and is currently the only supported value. limit, and is currently the only supported value.
--ai-gateway-dump-dir string, $CODER_AI_GATEWAY_DUMP_DIR
Base directory for dumping AI Bridge request/response pairs to disk
for debugging. When set, each provider writes under a subdirectory
named after the provider. Sensitive headers are redacted. Leave empty
to disable.
--ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true) --ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true)
Allow users to provide their own LLM API keys or subscriptions. When Allow users to provide their own LLM API keys or subscriptions. When
disabled, only centralized key authentication is permitted. disabled, only centralized key authentication is permitted.
+5
View File
@@ -920,6 +920,11 @@ ai_gateway:
# X-Ai-Bridge-Actor-Metadata-Username (their username). # X-Ai-Bridge-Actor-Metadata-Username (their username).
# (default: false, type: bool) # (default: false, type: bool)
send_actor_headers: false send_actor_headers: false
# Base directory for dumping AI Bridge request/response pairs to disk for
# debugging. When set, each provider writes under a subdirectory named after the
# provider. Sensitive headers are redacted. Leave empty to disable.
# (default: <unset>, type: string)
api_dump_dir: ""
# Allow users to provide their own LLM API keys or subscriptions. When disabled, # Allow users to provide their own LLM API keys or subscriptions. When disabled,
# only centralized key authentication is permitted. # only centralized key authentication is permitted.
# (default: true, type: bool) # (default: true, type: bool)
+5
View File
@@ -292,6 +292,11 @@ func providersFromEnv(ctx context.Context, cfg codersdk.AIBridgeConfig, logger s
Type: database.AiProviderTypeAnthropic, Type: database.AiProviderTypeAnthropic,
} }
if hasLegacyBedrock { if hasLegacyBedrock {
if hasAnthropicKey {
logger.Warn(ctx, "ignoring legacy Anthropic API key because Bedrock credentials are configured; Bedrock authenticates via access keys or credential chain",
slog.F("provider", aibridge.ProviderAnthropic),
)
}
// Bedrock-only deployments use CODER_AIBRIDGE_BEDROCK_BASE_URL // Bedrock-only deployments use CODER_AIBRIDGE_BEDROCK_BASE_URL
// for custom VPC, FIPS, or proxy endpoints. // for custom VPC, FIPS, or proxy endpoints.
dp.BaseURL = cfg.LegacyBedrock.BaseURL.String() dp.BaseURL = cfg.LegacyBedrock.BaseURL.String()
+4 -4
View File
@@ -14450,6 +14450,10 @@ const docTemplate = `{
} }
] ]
}, },
"api_dump_dir": {
"description": "APIDumpDir is the base directory under which each provider's\nrequest/response dumps are written, in a subdirectory named after\nthe provider. Empty disables dumping.",
"type": "string"
},
"bedrock": { "bedrock": {
"description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.", "description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.",
"allOf": [ "allOf": [
@@ -15062,10 +15066,6 @@ const docTemplate = `{
"bedrock_small_fast_model": { "bedrock_small_fast_model": {
"type": "string" "type": "string"
}, },
"dump_dir": {
"description": "DumpDir is the directory path for dumping API requests and responses.",
"type": "string"
},
"name": { "name": {
"description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.", "description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.",
"type": "string" "type": "string"
+4 -4
View File
@@ -12854,6 +12854,10 @@
} }
] ]
}, },
"api_dump_dir": {
"description": "APIDumpDir is the base directory under which each provider's\nrequest/response dumps are written, in a subdirectory named after\nthe provider. Empty disables dumping.",
"type": "string"
},
"bedrock": { "bedrock": {
"description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.", "description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.",
"allOf": [ "allOf": [
@@ -13466,10 +13470,6 @@
"bedrock_small_fast_model": { "bedrock_small_fast_model": {
"type": "string" "type": "string"
}, },
"dump_dir": {
"description": "DumpDir is the directory path for dumping API requests and responses.",
"type": "string"
},
"name": { "name": {
"description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.", "description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.",
"type": "string" "type": "string"
+1
View File
@@ -627,6 +627,7 @@ var (
rbac.ResourceAibridgeInterception.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, rbac.ResourceAibridgeInterception.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete},
rbac.ResourceAiModelPrice.Type: {policy.ActionUpdate}, // Required for the startup price seeder. rbac.ResourceAiModelPrice.Type: {policy.ActionUpdate}, // Required for the startup price seeder.
rbac.ResourceAiSeat.Type: {policy.ActionCreate}, // Required for UpsertAISeatState. rbac.ResourceAiSeat.Type: {policy.ActionCreate}, // Required for UpsertAISeatState.
rbac.ResourceAIProvider.Type: {policy.ActionRead}, // Required to load the provider snapshot (and per-provider keys) at startup.
}), }),
User: []rbac.Permission{}, User: []rbac.Permission{},
ByOrgID: map[string]rbac.OrgPermissions{}, ByOrgID: map[string]rbac.OrgPermissions{},
+15 -2
View File
@@ -1863,6 +1863,16 @@ func (c *DeploymentValues) Options() serpent.OptionSet {
Group: &deploymentGroupAIGateway, Group: &deploymentGroupAIGateway,
YAML: "structured_logging", YAML: "structured_logging",
} }
aiGatewayAPIDumpDir := serpent.Option{
Name: "AI Gateway API Dump Directory",
Description: "Base directory for dumping AI Bridge request/response pairs to disk for debugging. When set, each provider writes under a subdirectory named after the provider. Sensitive headers are redacted. Leave empty to disable.",
Flag: "ai-gateway-dump-dir",
Env: "CODER_AI_GATEWAY_DUMP_DIR",
Value: &c.AI.BridgeConfig.APIDumpDir,
Default: "",
Group: &deploymentGroupAIGateway,
YAML: "api_dump_dir",
}
aiGatewaySendActorHeaders := serpent.Option{ aiGatewaySendActorHeaders := serpent.Option{
Name: "AI Gateway Send Actor Headers", Name: "AI Gateway Send Actor Headers",
Description: "Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. " + Description: "Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. " +
@@ -4275,6 +4285,7 @@ Write out the current server config as YAML to stdout.`,
UseInstead: serpent.OptionSet{aiGatewaySendActorHeaders}, UseInstead: serpent.OptionSet{aiGatewaySendActorHeaders},
}, },
aiGatewaySendActorHeaders, aiGatewaySendActorHeaders,
aiGatewayAPIDumpDir,
{ {
Name: "AI Bridge Allow BYOK", Name: "AI Bridge Allow BYOK",
Description: "Deprecated: use --ai-gateway-allow-byok or CODER_AI_GATEWAY_ALLOW_BYOK instead. Allow users to provide their own LLM API keys or subscriptions. When disabled, only centralized key authentication is permitted.", Description: "Deprecated: use --ai-gateway-allow-byok or CODER_AI_GATEWAY_ALLOW_BYOK instead. Allow users to provide their own LLM API keys or subscriptions. When disabled, only centralized key authentication is permitted.",
@@ -4632,6 +4643,10 @@ type AIBridgeConfig struct {
CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"` CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"`
CircuitBreakerTimeout serpent.Duration `json:"circuit_breaker_timeout" typescript:",notnull"` CircuitBreakerTimeout serpent.Duration `json:"circuit_breaker_timeout" typescript:",notnull"`
CircuitBreakerMaxRequests serpent.Int64 `json:"circuit_breaker_max_requests" typescript:",notnull"` CircuitBreakerMaxRequests serpent.Int64 `json:"circuit_breaker_max_requests" typescript:",notnull"`
// APIDumpDir is the base directory under which each provider's
// request/response dumps are written, in a subdirectory named after
// the provider. Empty disables dumping.
APIDumpDir serpent.String `json:"api_dump_dir" typescript:",notnull"`
} }
type AIBridgeOpenAIConfig struct { type AIBridgeOpenAIConfig struct {
@@ -4669,8 +4684,6 @@ type AIProviderConfig struct {
Keys []string `json:"-"` Keys []string `json:"-"`
// BaseURL is the base URL of the upstream provider API. // BaseURL is the base URL of the upstream provider API.
BaseURL string `json:"base_url"` BaseURL string `json:"base_url"`
// DumpDir is the directory path for dumping API requests and responses.
DumpDir string `json:"dump_dir,omitempty"`
// Bedrock fields (only applicable when Type == "anthropic"). // Bedrock fields (only applicable when Type == "anthropic").
BedrockBaseURL string `json:"-"` BedrockBaseURL string `json:"-"`
+34 -9
View File
@@ -219,19 +219,11 @@ requests to `/api/v2/aibridge/<NAME>/` to target a specific instance:
**Supported keys per provider:** **Supported keys per provider:**
| Key | Required | Description | | Key | Required | Description |
|------------|----------|-------------------------------------------------------| |------------|----------|------------------------------------------------------|
| `TYPE` | Yes | Provider type: `openai`, `anthropic`, or `copilot` | | `TYPE` | Yes | Provider type: `openai`, `anthropic`, or `copilot` |
| `NAME` | No | Unique instance name for routing. Defaults to `TYPE` | | `NAME` | No | Unique instance name for routing. Defaults to `TYPE` |
| `KEY` | No | API key for upstream authentication (alias: `KEYS`) | | `KEY` | No | API key for upstream authentication (alias: `KEYS`) |
| `BASE_URL` | No | Base URL of the upstream API | | `BASE_URL` | No | Base URL of the upstream API |
| `DUMP_DIR` | No | Directory for provider API request and response dumps |
> [!WARNING]
> `DUMP_DIR` is not intended for regular use. Setting this option
> results in a high number of writes. Dump files contain raw request and
> response data, which may include proprietary or sensitive information
> (prompts, completions, tool inputs). Enable only briefly for diagnostic
> purposes and protect the target directory.
For `anthropic` providers using AWS Bedrock, the following keys are also For `anthropic` providers using AWS Bedrock, the following keys are also
available: `BEDROCK_BASE_URL`, `BEDROCK_REGION`, available: `BEDROCK_BASE_URL`, `BEDROCK_REGION`,
@@ -251,6 +243,39 @@ available: `BEDROCK_BASE_URL`, `BEDROCK_REGION`,
> will produce a startup error. Remove one or the other to resolve the > will produce a startup error. Remove one or the other to resolve the
> conflict. > conflict.
## API Dumps
AI Gateway can dump provider request and response pairs to disk for debugging.
Configure the dump directory with `--ai-gateway-dump-dir` or
`CODER_AI_GATEWAY_DUMP_DIR`:
```sh
coder server --ai-gateway-dump-dir=/var/lib/coder/ai-gateway-dumps
```
Or in YAML:
```yaml
ai_gateway:
api_dump_dir: /var/lib/coder/ai-gateway-dumps
```
This top-level setting replaces the previous per-provider `DUMP_DIR` field.
For each provider, AI Gateway writes dumps under `<base>/<provider_name>`, where
`<base>` is the configured dump directory and `<provider_name>` is the provider
instance name used in the route. For example, a provider named `anthropic-corp`
with `/var/lib/coder/ai-gateway-dumps` configured writes to
`/var/lib/coder/ai-gateway-dumps/anthropic-corp`.
Sensitive headers are redacted before dumps are written. Leave the value empty
to disable dumping.
> [!WARNING]
> API dumps are intended for short diagnostic sessions only. Dump files contain
> raw request and response data, which may include proprietary or sensitive
> information such as prompts, completions, and tool inputs. Protect the target
> directory and disable dumping when diagnostics are complete.
## Data Retention ## Data Retention
AI Gateway records prompts, token usage, tool invocations, and model reasoning for auditing and AI Gateway records prompts, token usage, tool invocations, and model reasoning for auditing and
+1 -1
View File
@@ -185,6 +185,7 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
"base_url": "string", "base_url": "string",
"key": "string" "key": "string"
}, },
"api_dump_dir": "string",
"bedrock": { "bedrock": {
"access_key": "string", "access_key": "string",
"access_key_secret": "string", "access_key_secret": "string",
@@ -213,7 +214,6 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
+5 -6
View File
@@ -442,6 +442,7 @@
"base_url": "string", "base_url": "string",
"key": "string" "key": "string"
}, },
"api_dump_dir": "string",
"bedrock": { "bedrock": {
"access_key": "string", "access_key": "string",
"access_key_secret": "string", "access_key_secret": "string",
@@ -470,7 +471,6 @@
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
@@ -488,6 +488,7 @@
|-------------------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-------------------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `allow_byok` | boolean | false | | | | `allow_byok` | boolean | false | | |
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_<N>_* env vars instead. | | `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_<N>_* env vars instead. |
| `api_dump_dir` | string | false | | Api dump dir is the base directory under which each provider's request/response dumps are written, in a subdirectory named after the provider. Empty disables dumping. |
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_<N>_* env vars instead. | | `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_<N>_* env vars instead. |
| `budget_period` | string | false | | | | `budget_period` | string | false | | |
| `budget_policy` | string | false | | Budget settings for AI Governance cost controls. | | `budget_policy` | string | false | | Budget settings for AI Governance cost controls. |
@@ -1245,6 +1246,7 @@
"base_url": "string", "base_url": "string",
"key": "string" "key": "string"
}, },
"api_dump_dir": "string",
"bedrock": { "bedrock": {
"access_key": "string", "access_key": "string",
"access_key_secret": "string", "access_key_secret": "string",
@@ -1273,7 +1275,6 @@
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
@@ -1344,7 +1345,6 @@
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
@@ -1358,7 +1358,6 @@
| `bedrock_model` | string | false | | | | `bedrock_model` | string | false | | |
| `bedrock_region` | string | false | | | | `bedrock_region` | string | false | | |
| `bedrock_small_fast_model` | string | false | | | | `bedrock_small_fast_model` | string | false | | |
| `dump_dir` | string | false | | Dump dir is the directory path for dumping API requests and responses. |
| `name` | string | false | | Name is the unique instance identifier used for routing. Defaults to Type if not provided. | | `name` | string | false | | Name is the unique instance identifier used for routing. Defaults to Type if not provided. |
| `type` | string | false | | Type is the provider type: "openai", "anthropic", or "copilot". | | `type` | string | false | | Type is the provider type: "openai", "anthropic", or "copilot". |
@@ -5706,6 +5705,7 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
"base_url": "string", "base_url": "string",
"key": "string" "key": "string"
}, },
"api_dump_dir": "string",
"bedrock": { "bedrock": {
"access_key": "string", "access_key": "string",
"access_key_secret": "string", "access_key_secret": "string",
@@ -5734,7 +5734,6 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
@@ -6305,6 +6304,7 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
"base_url": "string", "base_url": "string",
"key": "string" "key": "string"
}, },
"api_dump_dir": "string",
"bedrock": { "bedrock": {
"access_key": "string", "access_key": "string",
"access_key_secret": "string", "access_key_secret": "string",
@@ -6333,7 +6333,6 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
"bedrock_model": "string", "bedrock_model": "string",
"bedrock_region": "string", "bedrock_region": "string",
"bedrock_small_fast_model": "string", "bedrock_small_fast_model": "string",
"dump_dir": "string",
"name": "string", "name": "string",
"type": "string" "type": "string"
} }
+10
View File
@@ -1889,6 +1889,16 @@ Emit structured logs for AI Gateway interception records. Use this for exporting
Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. This is only needed if you are using a proxy between AI Gateway and an upstream AI provider. This will send X-Ai-Bridge-Actor-Id (the ID of the user making the request) and X-Ai-Bridge-Actor-Metadata-Username (their username). Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. This is only needed if you are using a proxy between AI Gateway and an upstream AI provider. This will send X-Ai-Bridge-Actor-Id (the ID of the user making the request) and X-Ai-Bridge-Actor-Metadata-Username (their username).
### --ai-gateway-dump-dir
| | |
|-------------|-----------------------------------------|
| Type | <code>string</code> |
| Environment | <code>$CODER_AI_GATEWAY_DUMP_DIR</code> |
| YAML | <code>ai_gateway.api_dump_dir</code> |
Base directory for dumping AI Bridge request/response pairs to disk for debugging. When set, each provider writes under a subdirectory named after the provider. Sensitive headers are redacted. Leave empty to disable.
### --ai-gateway-allow-byok ### --ai-gateway-allow-byok
| | | | | |
+12 -24
View File
@@ -6,11 +6,8 @@ import (
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/aibridge" "github.com/coder/coder/v2/aibridge"
agplcli "github.com/coder/coder/v2/cli"
"github.com/coder/coder/v2/codersdk"
) )
func TestDomainsFromProviders(t *testing.T) { func TestDomainsFromProviders(t *testing.T) {
@@ -19,14 +16,11 @@ func TestDomainsFromProviders(t *testing.T) {
t.Run("ExtractsHostnames", func(t *testing.T) { t.Run("ExtractsHostnames", func(t *testing.T) {
t.Parallel() t.Parallel()
providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ providers := []aibridge.Provider{
Providers: []codersdk.AIProviderConfig{ aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "openai", BaseURL: "https://api.openai.com/v1/"}),
{Type: aibridge.ProviderOpenAI, Name: "openai", Keys: []string{"k"}}, aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{Name: "anthropic", BaseURL: "https://api.anthropic.com/"}, nil),
{Type: aibridge.ProviderAnthropic, Name: "anthropic", Keys: []string{"k"}}, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "custom", BaseURL: "https://custom-llm.example.com:8443/api"}),
{Type: aibridge.ProviderOpenAI, Name: "custom", Keys: []string{"k"}, BaseURL: "https://custom-llm.example.com:8443/api"}, }
},
})
require.NoError(t, err)
domains, mapping := domainsFromProviders(providers) domains, mapping := domainsFromProviders(providers)
@@ -43,13 +37,10 @@ func TestDomainsFromProviders(t *testing.T) {
t.Run("DeduplicatesSameHost", func(t *testing.T) { t.Run("DeduplicatesSameHost", func(t *testing.T) {
t.Parallel() t.Parallel()
providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ providers := []aibridge.Provider{
Providers: []codersdk.AIProviderConfig{ aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "first", BaseURL: "https://api.example.com/v1"}),
{Type: aibridge.ProviderOpenAI, Name: "first", Keys: []string{"k"}, BaseURL: "https://api.example.com/v1"}, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "second", BaseURL: "https://api.example.com/v2"}),
{Type: aibridge.ProviderOpenAI, Name: "second", Keys: []string{"k"}, BaseURL: "https://api.example.com/v2"}, }
},
})
require.NoError(t, err)
domains, mapping := domainsFromProviders(providers) domains, mapping := domainsFromProviders(providers)
@@ -68,12 +59,9 @@ func TestDomainsFromProviders(t *testing.T) {
t.Run("CaseInsensitive", func(t *testing.T) { t.Run("CaseInsensitive", func(t *testing.T) {
t.Parallel() t.Parallel()
providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ providers := []aibridge.Provider{
Providers: []codersdk.AIProviderConfig{ aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "provider", BaseURL: "https://API.Example.COM/v1"}),
{Type: aibridge.ProviderOpenAI, Name: "provider", Keys: []string{"k"}, BaseURL: "https://API.Example.COM/v1"}, }
},
})
require.NoError(t, err)
domains, mapping := domainsFromProviders(providers) domains, mapping := domainsFromProviders(providers)
+1 -1
View File
@@ -167,7 +167,7 @@ func (r *RootCmd) Server(_ func()) *serpent.Command {
// in-memory roundtripper regardless of license); only the proxy // in-memory roundtripper regardless of license); only the proxy
// daemon remains enterprise-gated by config. // daemon remains enterprise-gated by config.
if options.DeploymentValues.AI.BridgeProxyConfig.Enabled.Value() { if options.DeploymentValues.AI.BridgeProxyConfig.Enabled.Value() {
providers, err := agplcli.BuildProviders(options.DeploymentValues.AI.BridgeConfig) providers, err := agplcli.BuildProviders(ctx, options.Database, options.DeploymentValues.AI.BridgeConfig, options.Logger.Named("aibridge.providers"))
if err != nil { if err != nil {
return nil, nil, xerrors.Errorf("build AI providers: %w", err) return nil, nil, xerrors.Errorf("build AI providers: %w", err)
} }
+6
View File
@@ -114,6 +114,12 @@ AI GATEWAY OPTIONS:
with AI budgets. "highest" selects the group with the largest spend with AI budgets. "highest" selects the group with the largest spend
limit, and is currently the only supported value. limit, and is currently the only supported value.
--ai-gateway-dump-dir string, $CODER_AI_GATEWAY_DUMP_DIR
Base directory for dumping AI Bridge request/response pairs to disk
for debugging. When set, each provider writes under a subdirectory
named after the provider. Sensitive headers are redacted. Leave empty
to disable.
--ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true) --ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true)
Allow users to provide their own LLM API keys or subscriptions. When Allow users to provide their own LLM API keys or subscriptions. When
disabled, only centralized key authentication is permitted. disabled, only centralized key authentication is permitted.
+6 -4
View File
@@ -82,6 +82,12 @@ export interface AIBridgeConfig {
readonly circuit_breaker_interval: number; readonly circuit_breaker_interval: number;
readonly circuit_breaker_timeout: number; readonly circuit_breaker_timeout: number;
readonly circuit_breaker_max_requests: number; readonly circuit_breaker_max_requests: number;
/**
* APIDumpDir is the base directory under which each provider's
* request/response dumps are written, in a subdirectory named after
* the provider. Empty disables dumping.
*/
readonly api_dump_dir: string;
} }
// From codersdk/aibridge.go // From codersdk/aibridge.go
@@ -384,10 +390,6 @@ export interface AIProviderConfig {
* BaseURL is the base URL of the upstream provider API. * BaseURL is the base URL of the upstream provider API.
*/ */
readonly base_url: string; readonly base_url: string;
/**
* DumpDir is the directory path for dumping API requests and responses.
*/
readonly dump_dir?: string;
readonly bedrock_region?: string; readonly bedrock_region?: string;
readonly bedrock_model?: string; readonly bedrock_model?: string;
readonly bedrock_small_fast_model?: string; readonly bedrock_small_fast_model?: string;