diff --git a/cli/aibridged.go b/cli/aibridged.go index 42bcc6d3d5..8bb21a8cbf 100644 --- a/cli/aibridged.go +++ b/cli/aibridged.go @@ -5,15 +5,21 @@ package cli import ( "context" + "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "golang.org/x/xerrors" + "cdr.dev/slog/v3" "github.com/coder/coder/v2/aibridge" "github.com/coder/coder/v2/aibridge/config" "github.com/coder/coder/v2/aibridge/keypool" "github.com/coder/coder/v2/coderd" "github.com/coder/coder/v2/coderd/aibridged" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/db2sdk" + "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/tracing" + "github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/codersdk" "github.com/coder/quartz" ) @@ -44,183 +50,200 @@ func newAIBridgeDaemon(coderAPI *coderd.API, providers []aibridge.Provider) (*ai return srv, nil } -// BuildProviders constructs the list of AI providers from config. -// It merges legacy single-provider env vars and indexed provider configs: -// 1. Legacy providers (from CODER_AI_GATEWAY_OPENAI_KEY, etc.) are added first. -// If a legacy name conflicts with an indexed provider, startup fails with -// a clear error asking the admin to remove one or the other. -// 2. Indexed providers (from CODER_AI_GATEWAY_PROVIDER__*) are added next. -func BuildProviders(cfg codersdk.AIBridgeConfig) ([]aibridge.Provider, error) { - var cbConfig *config.CircuitBreaker - if cfg.CircuitBreakerEnabled.Value() { - cbConfig = &config.CircuitBreaker{ - FailureThreshold: uint32(cfg.CircuitBreakerFailureThreshold.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. - Interval: cfg.CircuitBreakerInterval.Value(), - Timeout: cfg.CircuitBreakerTimeout.Value(), - MaxRequests: uint32(cfg.CircuitBreakerMaxRequests.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. +// BuildProviders loads every enabled ai_providers row, attaches its +// keys, and constructs the equivalent [aibridge.Provider] instances. +// The database is the single source of truth for runtime provider +// configuration. +// +// Per-provider construction errors are logged and the offending row is +// excluded from the returned snapshot; only a failure of the DB query +// itself is propagated. This keeps a single misconfigured row from +// taking the whole daemon down. +func BuildProviders(ctx context.Context, db database.Store, cfg codersdk.AIBridgeConfig, logger slog.Logger) ([]aibridge.Provider, error) { + //nolint:gocritic // AsAIBridged has a minimal permission set for this purpose. + authCtx := dbauthz.AsAIBridged(ctx) + + var rows []database.AIProvider + keysByProvider := make(map[uuid.UUID][]database.AIProviderKey) + + // Wrap both queries in a read-only transaction so the provider list + // and the key list are consistent with each other. + err := db.InTx(func(tx database.Store) error { + var err error + rows, err = tx.GetAIProviders(authCtx, database.GetAIProvidersParams{ + IncludeDisabled: false, + }) + if err != nil { + return xerrors.Errorf("load ai providers: %w", err) } + + if len(rows) == 0 { + return nil + } + + // Load keys only for the enabled providers to avoid materializing + // secrets for disabled rows. + ids := make([]uuid.UUID, len(rows)) + for i, r := range rows { + ids[i] = r.ID + } + keyRows, err := tx.GetAIProviderKeysByProviderIDs(authCtx, ids) + if err != nil { + return xerrors.Errorf("load ai provider keys: %w", err) + } + for _, k := range keyRows { + keysByProvider[k.ProviderID] = append(keysByProvider[k.ProviderID], k) + } + return nil + }, &database.TxOptions{ReadOnly: true, TxIdentifier: "build_ai_providers"}) + if err != nil { + return nil, err } - var providers []aibridge.Provider - usedNames := make(map[string]struct{}) - - // Collect names from indexed providers so we can detect conflicts - // with legacy providers. - for _, p := range cfg.Providers { - name := p.Name - if name == "" { - name = p.Type + out := make([]aibridge.Provider, 0, len(rows)) + for _, row := range rows { + prov, err := buildAIProviderFromRow(row, keysByProvider[row.ID], cfg) + if err != nil { + logger.Error(ctx, "skipping misconfigured ai provider", + slog.F("provider_id", row.ID), + slog.F("provider_name", row.Name), + slog.F("provider_type", string(row.Type)), + slog.Error(err), + ) + continue } - usedNames[name] = struct{}{} + out = append(out, prov) } - // Add legacy OpenAI provider if configured. - if cfg.LegacyOpenAI.Key.String() != "" { - if _, conflict := usedNames[aibridge.ProviderOpenAI]; conflict { - return nil, xerrors.Errorf("legacy CODER_AI_GATEWAY_OPENAI_KEY (or CODER_AIBRIDGE_OPENAI_KEY) conflicts with indexed provider named %q; remove one or the other", aibridge.ProviderOpenAI) - } - providers = append(providers, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{ - Name: aibridge.ProviderOpenAI, - BaseURL: cfg.LegacyOpenAI.BaseURL.String(), - Key: cfg.LegacyOpenAI.Key.String(), - CircuitBreaker: cbConfig, - SendActorHeaders: cfg.SendActorHeaders.Value(), - })) - usedNames[aibridge.ProviderOpenAI] = struct{}{} + if len(rows) > 0 && len(out) == 0 { + logger.Warn(ctx, "all enabled ai providers failed to build; daemon will start with zero providers") } - // Add legacy Anthropic provider if configured. Bedrock credentials - // alone are sufficient, an Anthropic API key is not required when - // using AWS Bedrock. - if cfg.LegacyAnthropic.Key.String() != "" || getBedrockConfig(cfg.LegacyBedrock) != nil { - if _, conflict := usedNames[aibridge.ProviderAnthropic]; conflict { - return nil, xerrors.Errorf("legacy CODER_AI_GATEWAY_ANTHROPIC_KEY (or CODER_AIBRIDGE_ANTHROPIC_KEY) conflicts with indexed provider named %q; remove one or the other", aibridge.ProviderAnthropic) + return out, nil +} + +// buildAIProviderFromRow decodes the settings blob and constructs the +// appropriate [aibridge.Provider] for a single ai_providers row. +func buildAIProviderFromRow( + row database.AIProvider, + keys []database.AIProviderKey, + cfg codersdk.AIBridgeConfig, +) (aibridge.Provider, error) { + settings, err := db2sdk.AIProviderSettings(row.Settings) + if err != nil { + return nil, xerrors.Errorf("decode settings: %w", err) + } + + cbCfg := circuitBreakerConfig(cfg) + sendActorHeaders := cfg.SendActorHeaders.Value() + dumpDir := cfg.APIDumpDir.Value() + + switch row.Type { + case database.AiProviderTypeOpenai: + if len(keys) == 0 && !cfg.AllowBYOK.Value() { + return nil, xerrors.New("openai provider has no api keys configured and BYOK is not enabled") } var pool *keypool.Pool - if key := cfg.LegacyAnthropic.Key.String(); key != "" { + if len(keys) > 0 { var err error - pool, err = keypool.New([]string{key}, quartz.NewReal()) + pool, err = buildAIProviderKeyPool(keys) if err != nil { - return nil, xerrors.Errorf("create legacy anthropic key pool: %w", err) + return nil, xerrors.Errorf("openai key pool: %w", err) } } - providers = append(providers, aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{ - Name: aibridge.ProviderAnthropic, - BaseURL: cfg.LegacyAnthropic.BaseURL.String(), + return aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{ + Name: row.Name, + BaseURL: row.BaseUrl, KeyPool: pool, - CircuitBreaker: cbConfig, - SendActorHeaders: cfg.SendActorHeaders.Value(), - }, getBedrockConfig(cfg.LegacyBedrock))) - usedNames[aibridge.ProviderAnthropic] = struct{}{} - } + APIDumpDir: dumpDir, + CircuitBreaker: cbCfg, + SendActorHeaders: sendActorHeaders, + }), nil - // Add indexed providers. - for _, p := range cfg.Providers { - name := p.Name - if name == "" { - name = p.Type + case database.AiProviderTypeAnthropic: + bedrock := bedrockConfigFromRow(row, settings) + // Bedrock-backed Anthropic authenticates via AWS credentials in + // the settings blob, not the api_keys table. A bearer-token + // Anthropic without any key cannot make upstream calls. + if bedrock == nil && len(keys) == 0 && !cfg.AllowBYOK.Value() { + return nil, xerrors.New("anthropic provider has no api keys, no bedrock credentials, and BYOK is not enabled") } - switch p.Type { - case aibridge.ProviderOpenAI: - var pool *keypool.Pool - if len(p.Keys) > 0 { - var err error - pool, err = keypool.New(p.Keys, quartz.NewReal()) - if err != nil { - return nil, xerrors.Errorf("create openai key pool for provider %q: %w", name, err) - } + var pool *keypool.Pool + if len(keys) > 0 { + var err error + pool, err = buildAIProviderKeyPool(keys) + if err != nil { + return nil, xerrors.Errorf("anthropic key pool: %w", err) } - providers = append(providers, aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{ - Name: name, - BaseURL: p.BaseURL, - KeyPool: pool, - APIDumpDir: p.DumpDir, - CircuitBreaker: cbConfig, - SendActorHeaders: cfg.SendActorHeaders.Value(), - })) - case aibridge.ProviderAnthropic: - var pool *keypool.Pool - if len(p.Keys) > 0 { - var err error - pool, err = keypool.New(p.Keys, quartz.NewReal()) - if err != nil { - return nil, xerrors.Errorf("create anthropic key pool for provider %q: %w", name, err) - } - } - providers = append(providers, aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{ - Name: name, - BaseURL: p.BaseURL, - KeyPool: pool, - APIDumpDir: p.DumpDir, - CircuitBreaker: cbConfig, - SendActorHeaders: cfg.SendActorHeaders.Value(), - }, bedrockConfigFromProvider(p))) - case aibridge.ProviderCopilot: - providers = append(providers, aibridge.NewCopilotProvider(aibridge.CopilotConfig{ - Name: name, - BaseURL: p.BaseURL, - APIDumpDir: p.DumpDir, - CircuitBreaker: cbConfig, - })) - default: - return nil, xerrors.Errorf("unknown provider type %q for provider %q", p.Type, name) } - } + return aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{ + Name: row.Name, + BaseURL: row.BaseUrl, + KeyPool: pool, + APIDumpDir: dumpDir, + CircuitBreaker: cbCfg, + SendActorHeaders: sendActorHeaders, + }, bedrock), nil - return providers, nil + case database.AiProviderTypeCopilot: + // Copilot is always BYOK; the per-user token is supplied on each + // request via the Authorization header, so no keypool is built. + return aibridge.NewCopilotProvider(aibridge.CopilotConfig{ + Name: row.Name, + BaseURL: row.BaseUrl, + APIDumpDir: dumpDir, + CircuitBreaker: cbCfg, + }), nil + + default: + return nil, xerrors.Errorf("unsupported provider type: %q", row.Type) + } } -// bedrockConfigFromProvider converts Bedrock fields from an indexed -// AIProviderConfig into an aibridge AWSBedrockConfig. -// Returns nil if no Bedrock fields are set. -func bedrockConfigFromProvider(p codersdk.AIProviderConfig) *aibridge.AWSBedrockConfig { - // Currently, only the first key pair is used, if any. - // TODO(ssncferreira): pass a keypool.Pool instead. - var accessKey, accessKeySecret string - if len(p.BedrockAccessKeys) > 0 { - accessKey = p.BedrockAccessKeys[0] +// buildAIProviderKeyPool builds a [keypool.Pool]. Callers must check +// len(keys) > 0 first; keypool.New rejects empty input. +func buildAIProviderKeyPool(keys []database.AIProviderKey) (*keypool.Pool, error) { + raw := make([]string, 0, len(keys)) + for _, k := range keys { + raw = append(raw, k.APIKey) } - if len(p.BedrockAccessKeySecrets) > 0 { - accessKeySecret = p.BedrockAccessKeySecrets[0] - } - settings := codersdk.NewAIProviderBedrockSettings( - p.BedrockRegion, accessKey, accessKeySecret, - p.BedrockModel, p.BedrockSmallFastModel, - ) - if !codersdk.IsBedrockConfigured(p.BedrockBaseURL, settings) { + return keypool.New(raw, quartz.NewReal()) +} + +// bedrockConfigFromRow returns nil when the settings have no Bedrock +// discriminator or when the Bedrock fields are not actually configured. +// The provider row's BaseUrl is the generic upstream endpoint and is +// always non-empty, so it cannot serve as a Bedrock detection signal; +// gate on the settings blob alone via [codersdk.AIProviderBedrockSettings.IsConfigured]. +func bedrockConfigFromRow(row database.AIProvider, settings codersdk.AIProviderSettings) *aibridge.AWSBedrockConfig { + if settings.Bedrock == nil { return nil } + bedrockSettings := *settings.Bedrock + if !bedrockSettings.IsConfigured() { + return nil + } + accessKey := ptr.NilToEmpty(bedrockSettings.AccessKey) + accessKeySecret := ptr.NilToEmpty(bedrockSettings.AccessKeySecret) return &aibridge.AWSBedrockConfig{ - BaseURL: p.BedrockBaseURL, - Region: p.BedrockRegion, + BaseURL: row.BaseUrl, + Region: bedrockSettings.Region, AccessKey: accessKey, AccessKeySecret: accessKeySecret, - Model: p.BedrockModel, - SmallFastModel: p.BedrockSmallFastModel, + Model: bedrockSettings.Model, + SmallFastModel: bedrockSettings.SmallFastModel, } } -func getBedrockConfig(cfg codersdk.AIBridgeBedrockConfig) *aibridge.AWSBedrockConfig { - // codersdk.IsBedrockConfigured decides what counts as Bedrock; when - // it returns false, the AWS SDK default credential chain (env vars, - // shared config, IAM roles, etc.) is left to resolve credentials. - settings := codersdk.NewAIProviderBedrockSettings( - cfg.Region.String(), - cfg.AccessKey.String(), - cfg.AccessKeySecret.String(), - cfg.Model.String(), - cfg.SmallFastModel.String(), - ) - if !codersdk.IsBedrockConfigured(cfg.BaseURL.String(), settings) { +// circuitBreakerConfig returns nil when the breaker is disabled. +func circuitBreakerConfig(cfg codersdk.AIBridgeConfig) *config.CircuitBreaker { + if !cfg.CircuitBreakerEnabled.Value() { return nil } - - return &aibridge.AWSBedrockConfig{ - BaseURL: cfg.BaseURL.String(), - Region: cfg.Region.String(), - AccessKey: cfg.AccessKey.String(), - AccessKeySecret: cfg.AccessKeySecret.String(), - Model: cfg.Model.String(), - SmallFastModel: cfg.SmallFastModel.String(), + return &config.CircuitBreaker{ + FailureThreshold: uint32(cfg.CircuitBreakerFailureThreshold.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. + Interval: cfg.CircuitBreakerInterval.Value(), + Timeout: cfg.CircuitBreakerTimeout.Value(), + MaxRequests: uint32(cfg.CircuitBreakerMaxRequests.Value()), //nolint:gosec // Validated by serpent.Validate in deployment options. } } diff --git a/cli/aibridged_internal_test.go b/cli/aibridged_internal_test.go index 2c2651cc11..1f9f512678 100644 --- a/cli/aibridged_internal_test.go +++ b/cli/aibridged_internal_test.go @@ -3,23 +3,47 @@ package cli import ( + "database/sql" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "cdr.dev/slog/v3/sloggers/slogtest" "github.com/coder/coder/v2/aibridge" + "github.com/coder/coder/v2/coderd" agplaibridge "github.com/coder/coder/v2/coderd/aibridge" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/testutil" "github.com/coder/serpent" ) +// buildFromEnv exercises the same env-config-in/providers-out path that +// production uses on boot: SeedAIProvidersFromEnv writes the env-derived +// rows to the database, and BuildProviders reads them back as runtime +// [aibridge.Provider] instances. This keeps the existing TestBuildProviders +// table intact while reflecting the post-refactor flow where the database +// is the single source of truth. +func buildFromEnv(t *testing.T, cfg codersdk.AIBridgeConfig) ([]aibridge.Provider, error) { + t.Helper() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, nil) + if err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, logger); err != nil { + return nil, err + } + return BuildProviders(ctx, db, cfg, logger) +} + func TestBuildProviders(t *testing.T) { t.Parallel() t.Run("EmptyConfig", func(t *testing.T) { t.Parallel() - providers, err := BuildProviders(codersdk.AIBridgeConfig{}) + providers, err := buildFromEnv(t, codersdk.AIBridgeConfig{}) require.NoError(t, err) assert.Empty(t, providers) }) @@ -30,7 +54,7 @@ func TestBuildProviders(t *testing.T) { cfg.LegacyOpenAI.Key = serpent.String("sk-openai") cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic") - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) names := providerNames(providers) @@ -44,28 +68,29 @@ func TestBuildProviders(t *testing.T) { cfg := codersdk.AIBridgeConfig{ Providers: []codersdk.AIProviderConfig{ { - Type: aibridge.ProviderAnthropic, - Name: "anthropic-zdr", - Keys: []string{"sk-zdr"}, - DumpDir: "/tmp/anthropic-dump", + Type: aibridge.ProviderAnthropic, + Name: "anthropic-zdr", + Keys: []string{"sk-zdr"}, }, { Type: aibridge.ProviderOpenAI, Name: "openai-azure", Keys: []string{"sk-azure"}, BaseURL: "https://azure.openai.com", - DumpDir: "/tmp/openai-dump", }, }, } - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) + require.Len(t, providers, 2) - names := providerNames(providers) - assert.Equal(t, []string{"anthropic-zdr", "openai-azure"}, names) - assert.Equal(t, "/tmp/anthropic-dump", providers[0].APIDumpDir()) - assert.Equal(t, "/tmp/openai-dump", providers[1].APIDumpDir()) + byName := make(map[string]aibridge.Provider, len(providers)) + for _, p := range providers { + byName[p.Name()] = p + } + require.Contains(t, byName, "anthropic-zdr") + require.Contains(t, byName, "openai-azure") }) t.Run("LegacyOpenAIConflictsWithIndexed", func(t *testing.T) { @@ -77,9 +102,9 @@ func TestBuildProviders(t *testing.T) { } cfg.LegacyOpenAI.Key = serpent.String("sk-legacy") - _, err := BuildProviders(cfg) + _, err := buildFromEnv(t, cfg) require.Error(t, err) - assert.Contains(t, err.Error(), "conflicts with indexed provider") + assert.Contains(t, err.Error(), "conflicts with the legacy env var") }) t.Run("LegacyAnthropicConflictsWithIndexed", func(t *testing.T) { @@ -91,9 +116,9 @@ func TestBuildProviders(t *testing.T) { } cfg.LegacyAnthropic.Key = serpent.String("sk-legacy") - _, err := BuildProviders(cfg) + _, err := buildFromEnv(t, cfg) require.Error(t, err) - assert.Contains(t, err.Error(), "conflicts with indexed provider") + assert.Contains(t, err.Error(), "conflicts with the legacy env var") }) t.Run("MixedLegacyAndIndexed", func(t *testing.T) { @@ -106,7 +131,7 @@ func TestBuildProviders(t *testing.T) { cfg.LegacyOpenAI.Key = serpent.String("sk-openai") cfg.LegacyAnthropic.Key = serpent.String("sk-anthropic") - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) names := providerNames(providers) @@ -123,7 +148,7 @@ func TestBuildProviders(t *testing.T) { cfg.LegacyBedrock.AccessKey = serpent.String("AKID") cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret") - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) names := providerNames(providers) @@ -139,7 +164,7 @@ func TestBuildProviders(t *testing.T) { cfg.LegacyBedrock.AccessKey = serpent.String("AKID") cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret") - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) require.Len(t, providers, 1) @@ -150,15 +175,18 @@ func TestBuildProviders(t *testing.T) { t.Run("UnknownType", func(t *testing.T) { t.Parallel() + // Unknown provider types are dropped by the seed step (logged + // and skipped) so one misconfigured row cannot stop the daemon + // from starting. The end state is "no providers", not an error. cfg := codersdk.AIBridgeConfig{ Providers: []codersdk.AIProviderConfig{ {Type: "gemini", Name: "gemini-pro"}, }, } - _, err := BuildProviders(cfg) - require.Error(t, err) - assert.Contains(t, err.Error(), "unknown provider type") + providers, err := buildFromEnv(t, cfg) + require.NoError(t, err) + assert.Empty(t, providers) }) t.Run("CopilotVariants", func(t *testing.T) { @@ -167,22 +195,25 @@ func TestBuildProviders(t *testing.T) { // Copilot API hosts via an explicit BASE_URL. cfg := codersdk.AIBridgeConfig{ Providers: []codersdk.AIProviderConfig{ - {Type: aibridge.ProviderCopilot, Name: aibridge.ProviderCopilot, DumpDir: "/tmp/copilot-dump"}, + {Type: aibridge.ProviderCopilot, Name: aibridge.ProviderCopilot}, {Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotBusiness, BaseURL: "https://" + agplaibridge.HostCopilotBusiness}, {Type: aibridge.ProviderCopilot, Name: agplaibridge.ProviderCopilotEnterprise, BaseURL: "https://" + agplaibridge.HostCopilotEnterprise}, }, } - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) require.Len(t, providers, 3) - assert.Equal(t, aibridge.ProviderCopilot, providers[0].Name()) - assert.Equal(t, "/tmp/copilot-dump", providers[0].APIDumpDir()) - assert.Equal(t, agplaibridge.ProviderCopilotBusiness, providers[1].Name()) - assert.Equal(t, "https://"+agplaibridge.HostCopilotBusiness, providers[1].BaseURL()) - assert.Equal(t, agplaibridge.ProviderCopilotEnterprise, providers[2].Name()) - assert.Equal(t, "https://"+agplaibridge.HostCopilotEnterprise, providers[2].BaseURL()) + byName := make(map[string]aibridge.Provider, len(providers)) + for _, p := range providers { + byName[p.Name()] = p + } + require.Contains(t, byName, aibridge.ProviderCopilot) + require.Contains(t, byName, agplaibridge.ProviderCopilotBusiness) + require.Contains(t, byName, agplaibridge.ProviderCopilotEnterprise) + assert.Equal(t, "https://"+agplaibridge.HostCopilotBusiness, byName[agplaibridge.ProviderCopilotBusiness].BaseURL()) + assert.Equal(t, "https://"+agplaibridge.HostCopilotEnterprise, byName[agplaibridge.ProviderCopilotEnterprise].BaseURL()) }) t.Run("ChatGPTProvider", func(t *testing.T) { @@ -191,17 +222,158 @@ func TestBuildProviders(t *testing.T) { // base URL. Admins configure it as an indexed openai provider. cfg := codersdk.AIBridgeConfig{ Providers: []codersdk.AIProviderConfig{ - {Type: aibridge.ProviderOpenAI, Name: agplaibridge.ProviderChatGPT, BaseURL: agplaibridge.BaseURLChatGPT}, + {Type: aibridge.ProviderOpenAI, Name: agplaibridge.ProviderChatGPT, Keys: []string{"sk-chatgpt"}, BaseURL: agplaibridge.BaseURLChatGPT}, }, } - providers, err := BuildProviders(cfg) + providers, err := buildFromEnv(t, cfg) require.NoError(t, err) require.Len(t, providers, 1) assert.Equal(t, agplaibridge.ProviderChatGPT, providers[0].Name()) assert.Equal(t, agplaibridge.BaseURLChatGPT, providers[0].BaseURL()) }) + + t.Run("NativeAnthropicDefaultBaseURL", func(t *testing.T) { + t.Parallel() + row := database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: aibridge.ProviderAnthropic, + BaseUrl: "https://api.anthropic.com/", + } + assert.Nil(t, bedrockConfigFromRow(row, codersdk.AIProviderSettings{})) + }) + + t.Run("NativeAnthropicCustomBaseURL", func(t *testing.T) { + t.Parallel() + row := database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic-proxy", + BaseUrl: "https://internal-proxy.example.com/anthropic/", + } + assert.Nil(t, bedrockConfigFromRow(row, codersdk.AIProviderSettings{})) + }) + + t.Run("BedrockSettingsPresent", func(t *testing.T) { + t.Parallel() + accessKey := "AKID" + secret := "secret" + model := "anthropic.claude-3-5-sonnet-20241022-v2:0" + smallModel := "anthropic.claude-3-5-haiku-20241022-v1:0" + row := database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic-bedrock", + BaseUrl: "https://bedrock-runtime.us-west-2.amazonaws.com/", + } + settings := codersdk.AIProviderSettings{ + Bedrock: &codersdk.AIProviderBedrockSettings{ + Region: "us-west-2", + AccessKey: &accessKey, + AccessKeySecret: &secret, + Model: model, + SmallFastModel: smallModel, + }, + } + got := bedrockConfigFromRow(row, settings) + require.NotNil(t, got) + assert.Equal(t, row.BaseUrl, got.BaseURL) + assert.Equal(t, "us-west-2", got.Region) + assert.Equal(t, accessKey, got.AccessKey) + assert.Equal(t, secret, got.AccessKeySecret) + assert.Equal(t, model, got.Model) + assert.Equal(t, smallModel, got.SmallFastModel) + }) + + t.Run("BedrockSettingsEmpty", func(t *testing.T) { + t.Parallel() + // A non-nil but zero-valued Bedrock settings blob should not + // produce a Bedrock config; the provider's generic BaseUrl is + // not a Bedrock detection signal. + row := database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic-empty-bedrock", + BaseUrl: "https://api.anthropic.com/", + } + settings := codersdk.AIProviderSettings{ + Bedrock: &codersdk.AIProviderBedrockSettings{}, + } + assert.Nil(t, bedrockConfigFromRow(row, settings)) + }) +} + +// TestBuildProvidersSkipsBadRows exercises the skip-and-continue path +// directly: rows whose settings blob is malformed or whose type is not +// supported by the runtime builder are logged and excluded from the +// returned snapshot without surfacing a top-level error. The seed path +// filters most of these out before insert, so we bypass it and insert +// rows straight into the database via dbgen. +func TestBuildProvidersSkipsBadRows(t *testing.T) { + t.Parallel() + + t.Run("CorruptSettings", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + + dbgen.AIProvider(t, db, database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic-broken", + BaseUrl: "https://api.anthropic.com/", + Settings: sql.NullString{String: "not-json", Valid: true}, + }) + + providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger) + require.NoError(t, err) + assert.Empty(t, providers) + }) + + t.Run("UnsupportedType", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + + // Azure is a valid DB-level provider type but has no runtime + // builder yet; it must hit the default branch and be skipped. + dbgen.AIProvider(t, db, database.AIProvider{ + Type: database.AiProviderTypeAzure, + Name: "azure-openai", + BaseUrl: "https://example.openai.azure.com/", + }) + + providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger) + require.NoError(t, err) + assert.Empty(t, providers) + }) + + t.Run("BadRowDoesNotBlockGoodRow", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + + dbgen.AIProvider(t, db, database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic-broken", + BaseUrl: "https://api.anthropic.com/", + Settings: sql.NullString{String: "{not valid json", Valid: true}, + }) + good := dbgen.AIProvider(t, db, database.AIProvider{ + Type: database.AiProviderTypeOpenai, + Name: "openai-good", + BaseUrl: "https://api.openai.com/", + }) + dbgen.AIProviderKey(t, db, database.AIProviderKey{ + ProviderID: good.ID, + APIKey: "sk-good", + }) + + providers, err := BuildProviders(ctx, db, codersdk.AIBridgeConfig{}, logger) + require.NoError(t, err) + require.Len(t, providers, 1) + assert.Equal(t, "openai-good", providers[0].Name()) + }) } func providerNames(providers []aibridge.Provider) []string { diff --git a/cli/server.go b/cli/server.go index 1b2350d931..02e6b16564 100644 --- a/cli/server.go +++ b/cli/server.go @@ -899,6 +899,32 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. if err != nil { return xerrors.Errorf("remove secrets from deployment values: %w", err) } + + // AI provider DB initialization runs synchronously here so + // authorized reads complete before any background goroutine + // starts. Otherwise a mid-startup cancellation can interrupt + // them and fail startup. Seeding must also happen before + // newAPI so the aibridgeproxyd in the enterprise closure + // observes env-configured providers. + // + // This is a once-off operation; once completed, all providers + // will be sourced from the database. + if err := coderd.SeedAIProvidersFromEnv( + ctx, + options.Database, + vals.AI.BridgeConfig, + logger.Named("aibridge.envseed"), + ); err != nil { + return xerrors.Errorf("seed ai providers from env: %w", err) + } + var aibridgeProviders []aibridge.Provider + if vals.AI.BridgeConfig.Enabled.Value() { + aibridgeProviders, err = BuildProviders(ctx, options.Database, vals.AI.BridgeConfig, logger.Named("aibridge.providers")) + if err != nil { + return xerrors.Errorf("build AI providers: %w", err) + } + } + telemetryReporter, err := telemetry.New(telemetry.Options{ Disabled: !vals.Telemetry.Enable.Value(), BuiltinPostgres: builtinPostgres, @@ -1006,18 +1032,6 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. notificationReportGenerator := reports.NewReportGenerator(ctx, logger.Named("notifications.report_generator"), options.Database, options.NotificationsEnqueuer, quartz.NewReal()) defer notificationReportGenerator.Close() - // Seed providers before newAPI so the aibridgeproxyd inside - // the enterprise closure observes env-configured providers - // at init. - if err := coderd.SeedAIProvidersFromEnv( - ctx, - options.Database, - vals.AI.BridgeConfig, - logger.Named("aibridge.envseed"), - ); err != nil { - return xerrors.Errorf("seed ai providers from env: %w", err) - } - // We use a separate coderAPICloser so the Enterprise API // can have its own close functions. This is cleaner // than abstracting the Coder API itself. @@ -1034,11 +1048,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. // unconditionally when the bridge feature is enabled by config so // chatd can use it regardless of license entitlement. if vals.AI.BridgeConfig.Enabled.Value() { - providers, err := BuildProviders(vals.AI.BridgeConfig) - if err != nil { - return xerrors.Errorf("build AI providers: %w", err) - } - aibridgeDaemon, err := newAIBridgeDaemon(coderAPI, providers) + aibridgeDaemon, err := newAIBridgeDaemon(coderAPI, aibridgeProviders) if err != nil { return xerrors.Errorf("create aibridged: %w", err) } @@ -3114,8 +3124,6 @@ func readAIProvidersForPrefix(logger slog.Logger, environ []string, prefix strin } case "BASE_URL": provider.BaseURL = v.Value - case "DUMP_DIR": - provider.DumpDir = v.Value case "BEDROCK_BASE_URL": provider.BedrockBaseURL = v.Value case "BEDROCK_REGION": diff --git a/cli/server_aibridge_internal_test.go b/cli/server_aibridge_internal_test.go index 8afed4c749..1797f1c7ed 100644 --- a/cli/server_aibridge_internal_test.go +++ b/cli/server_aibridge_internal_test.go @@ -10,8 +10,10 @@ import ( "cdr.dev/slog/v3" "cdr.dev/slog/v3/sloggers/slogtest" "github.com/coder/coder/v2/aibridge" + "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/testutil" + "github.com/coder/serpent" ) func TestReadAIProvidersFromEnv(t *testing.T) { @@ -34,7 +36,6 @@ func TestReadAIProvidersFromEnv(t *testing.T) { "CODER_AIBRIDGE_PROVIDER_0_NAME=anthropic-zdr", "CODER_AIBRIDGE_PROVIDER_0_KEY=sk-ant-xxx", "CODER_AIBRIDGE_PROVIDER_0_BASE_URL=https://api.anthropic.com/", - "CODER_AIBRIDGE_PROVIDER_0_DUMP_DIR=/tmp/aibridge-dump", }, expected: []codersdk.AIProviderConfig{ { @@ -42,7 +43,6 @@ func TestReadAIProvidersFromEnv(t *testing.T) { Name: "anthropic-zdr", Keys: []string{"sk-ant-xxx"}, BaseURL: "https://api.anthropic.com/", - DumpDir: "/tmp/aibridge-dump", }, }, }, @@ -537,3 +537,52 @@ func TestValidateLegacyAIBridgeConfig(t *testing.T) { }) } } + +func TestBuildAIProviderFromRowSetsAPIDumpDir(t *testing.T) { + t.Parallel() + + const dumpDir = "/tmp/coder-aibridge-dumps" + + tests := []struct { + name string + row database.AIProvider + }{ + { + name: "OpenAI", + row: database.AIProvider{ + Type: database.AiProviderTypeOpenai, + Name: "openai", + BaseUrl: "https://api.openai.com/", + }, + }, + { + name: "Anthropic", + row: database.AIProvider{ + Type: database.AiProviderTypeAnthropic, + Name: "anthropic", + BaseUrl: "https://api.anthropic.com/", + }, + }, + { + name: "Copilot", + row: database.AIProvider{ + Type: database.AiProviderTypeCopilot, + Name: "copilot", + BaseUrl: "https://api.githubcopilot.com/", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + provider, err := buildAIProviderFromRow(tt.row, nil, codersdk.AIBridgeConfig{ + AllowBYOK: serpent.Bool(true), + APIDumpDir: serpent.String(dumpDir), + }) + require.NoError(t, err) + assert.Equal(t, dumpDir, provider.APIDumpDir()) + }) + } +} diff --git a/cli/testdata/coder_server_--help.golden b/cli/testdata/coder_server_--help.golden index 32225cedc7..4bcf9efda9 100644 --- a/cli/testdata/coder_server_--help.golden +++ b/cli/testdata/coder_server_--help.golden @@ -113,6 +113,12 @@ AI GATEWAY OPTIONS: with AI budgets. "highest" selects the group with the largest spend limit, and is currently the only supported value. + --ai-gateway-dump-dir string, $CODER_AI_GATEWAY_DUMP_DIR + Base directory for dumping AI Bridge request/response pairs to disk + for debugging. When set, each provider writes under a subdirectory + named after the provider. Sensitive headers are redacted. Leave empty + to disable. + --ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true) Allow users to provide their own LLM API keys or subscriptions. When disabled, only centralized key authentication is permitted. diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index c9f6725210..3e73b7ca3b 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -920,6 +920,11 @@ ai_gateway: # X-Ai-Bridge-Actor-Metadata-Username (their username). # (default: false, type: bool) send_actor_headers: false + # Base directory for dumping AI Bridge request/response pairs to disk for + # debugging. When set, each provider writes under a subdirectory named after the + # provider. Sensitive headers are redacted. Leave empty to disable. + # (default: , type: string) + api_dump_dir: "" # Allow users to provide their own LLM API keys or subscriptions. When disabled, # only centralized key authentication is permitted. # (default: true, type: bool) diff --git a/coderd/ai_providers_migrate.go b/coderd/ai_providers_migrate.go index c54cd12e28..98cfba2226 100644 --- a/coderd/ai_providers_migrate.go +++ b/coderd/ai_providers_migrate.go @@ -292,6 +292,11 @@ func providersFromEnv(ctx context.Context, cfg codersdk.AIBridgeConfig, logger s Type: database.AiProviderTypeAnthropic, } if hasLegacyBedrock { + if hasAnthropicKey { + logger.Warn(ctx, "ignoring legacy Anthropic API key because Bedrock credentials are configured; Bedrock authenticates via access keys or credential chain", + slog.F("provider", aibridge.ProviderAnthropic), + ) + } // Bedrock-only deployments use CODER_AIBRIDGE_BEDROCK_BASE_URL // for custom VPC, FIPS, or proxy endpoints. dp.BaseURL = cfg.LegacyBedrock.BaseURL.String() diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index dce2daec4c..ae5715bd78 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -14450,6 +14450,10 @@ const docTemplate = `{ } ] }, + "api_dump_dir": { + "description": "APIDumpDir is the base directory under which each provider's\nrequest/response dumps are written, in a subdirectory named after\nthe provider. Empty disables dumping.", + "type": "string" + }, "bedrock": { "description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.", "allOf": [ @@ -15062,10 +15066,6 @@ const docTemplate = `{ "bedrock_small_fast_model": { "type": "string" }, - "dump_dir": { - "description": "DumpDir is the directory path for dumping API requests and responses.", - "type": "string" - }, "name": { "description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.", "type": "string" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 88839dc5ac..21ee879158 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -12854,6 +12854,10 @@ } ] }, + "api_dump_dir": { + "description": "APIDumpDir is the base directory under which each provider's\nrequest/response dumps are written, in a subdirectory named after\nthe provider. Empty disables dumping.", + "type": "string" + }, "bedrock": { "description": "Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER_\u003cN\u003e_* env vars instead.", "allOf": [ @@ -13466,10 +13470,6 @@ "bedrock_small_fast_model": { "type": "string" }, - "dump_dir": { - "description": "DumpDir is the directory path for dumping API requests and responses.", - "type": "string" - }, "name": { "description": "Name is the unique instance identifier used for routing.\nDefaults to Type if not provided.", "type": "string" diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 086a35d229..74aa3f9f46 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -627,6 +627,7 @@ var ( rbac.ResourceAibridgeInterception.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, rbac.ResourceAiModelPrice.Type: {policy.ActionUpdate}, // Required for the startup price seeder. rbac.ResourceAiSeat.Type: {policy.ActionCreate}, // Required for UpsertAISeatState. + rbac.ResourceAIProvider.Type: {policy.ActionRead}, // Required to load the provider snapshot (and per-provider keys) at startup. }), User: []rbac.Permission{}, ByOrgID: map[string]rbac.OrgPermissions{}, diff --git a/codersdk/deployment.go b/codersdk/deployment.go index 97dcd6e27d..dab861590f 100644 --- a/codersdk/deployment.go +++ b/codersdk/deployment.go @@ -1863,6 +1863,16 @@ func (c *DeploymentValues) Options() serpent.OptionSet { Group: &deploymentGroupAIGateway, YAML: "structured_logging", } + aiGatewayAPIDumpDir := serpent.Option{ + Name: "AI Gateway API Dump Directory", + Description: "Base directory for dumping AI Bridge request/response pairs to disk for debugging. When set, each provider writes under a subdirectory named after the provider. Sensitive headers are redacted. Leave empty to disable.", + Flag: "ai-gateway-dump-dir", + Env: "CODER_AI_GATEWAY_DUMP_DIR", + Value: &c.AI.BridgeConfig.APIDumpDir, + Default: "", + Group: &deploymentGroupAIGateway, + YAML: "api_dump_dir", + } aiGatewaySendActorHeaders := serpent.Option{ Name: "AI Gateway Send Actor Headers", Description: "Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. " + @@ -4275,6 +4285,7 @@ Write out the current server config as YAML to stdout.`, UseInstead: serpent.OptionSet{aiGatewaySendActorHeaders}, }, aiGatewaySendActorHeaders, + aiGatewayAPIDumpDir, { Name: "AI Bridge Allow BYOK", Description: "Deprecated: use --ai-gateway-allow-byok or CODER_AI_GATEWAY_ALLOW_BYOK instead. Allow users to provide their own LLM API keys or subscriptions. When disabled, only centralized key authentication is permitted.", @@ -4632,6 +4643,10 @@ type AIBridgeConfig struct { CircuitBreakerInterval serpent.Duration `json:"circuit_breaker_interval" typescript:",notnull"` CircuitBreakerTimeout serpent.Duration `json:"circuit_breaker_timeout" typescript:",notnull"` CircuitBreakerMaxRequests serpent.Int64 `json:"circuit_breaker_max_requests" typescript:",notnull"` + // APIDumpDir is the base directory under which each provider's + // request/response dumps are written, in a subdirectory named after + // the provider. Empty disables dumping. + APIDumpDir serpent.String `json:"api_dump_dir" typescript:",notnull"` } type AIBridgeOpenAIConfig struct { @@ -4669,8 +4684,6 @@ type AIProviderConfig struct { Keys []string `json:"-"` // BaseURL is the base URL of the upstream provider API. BaseURL string `json:"base_url"` - // DumpDir is the directory path for dumping API requests and responses. - DumpDir string `json:"dump_dir,omitempty"` // Bedrock fields (only applicable when Type == "anthropic"). BedrockBaseURL string `json:"-"` diff --git a/docs/ai-coder/ai-gateway/setup.md b/docs/ai-coder/ai-gateway/setup.md index dd95afdaac..27aed7c050 100644 --- a/docs/ai-coder/ai-gateway/setup.md +++ b/docs/ai-coder/ai-gateway/setup.md @@ -218,20 +218,12 @@ requests to `/api/v2/aibridge//` to target a specific instance: **Supported keys per provider:** -| Key | Required | Description | -|------------|----------|-------------------------------------------------------| -| `TYPE` | Yes | Provider type: `openai`, `anthropic`, or `copilot` | -| `NAME` | No | Unique instance name for routing. Defaults to `TYPE` | -| `KEY` | No | API key for upstream authentication (alias: `KEYS`) | -| `BASE_URL` | No | Base URL of the upstream API | -| `DUMP_DIR` | No | Directory for provider API request and response dumps | - -> [!WARNING] -> `DUMP_DIR` is not intended for regular use. Setting this option -> results in a high number of writes. Dump files contain raw request and -> response data, which may include proprietary or sensitive information -> (prompts, completions, tool inputs). Enable only briefly for diagnostic -> purposes and protect the target directory. +| Key | Required | Description | +|------------|----------|------------------------------------------------------| +| `TYPE` | Yes | Provider type: `openai`, `anthropic`, or `copilot` | +| `NAME` | No | Unique instance name for routing. Defaults to `TYPE` | +| `KEY` | No | API key for upstream authentication (alias: `KEYS`) | +| `BASE_URL` | No | Base URL of the upstream API | For `anthropic` providers using AWS Bedrock, the following keys are also available: `BEDROCK_BASE_URL`, `BEDROCK_REGION`, @@ -251,6 +243,39 @@ available: `BEDROCK_BASE_URL`, `BEDROCK_REGION`, > will produce a startup error. Remove one or the other to resolve the > conflict. +## API Dumps + +AI Gateway can dump provider request and response pairs to disk for debugging. +Configure the dump directory with `--ai-gateway-dump-dir` or +`CODER_AI_GATEWAY_DUMP_DIR`: + +```sh +coder server --ai-gateway-dump-dir=/var/lib/coder/ai-gateway-dumps +``` + +Or in YAML: + +```yaml +ai_gateway: + api_dump_dir: /var/lib/coder/ai-gateway-dumps +``` + +This top-level setting replaces the previous per-provider `DUMP_DIR` field. +For each provider, AI Gateway writes dumps under `/`, where +`` is the configured dump directory and `` is the provider +instance name used in the route. For example, a provider named `anthropic-corp` +with `/var/lib/coder/ai-gateway-dumps` configured writes to +`/var/lib/coder/ai-gateway-dumps/anthropic-corp`. + +Sensitive headers are redacted before dumps are written. Leave the value empty +to disable dumping. + +> [!WARNING] +> API dumps are intended for short diagnostic sessions only. Dump files contain +> raw request and response data, which may include proprietary or sensitive +> information such as prompts, completions, and tool inputs. Protect the target +> directory and disable dumping when diagnostics are complete. + ## Data Retention AI Gateway records prompts, token usage, tool invocations, and model reasoning for auditing and diff --git a/docs/reference/api/general.md b/docs/reference/api/general.md index 57e564aee3..02dbfe4135 100644 --- a/docs/reference/api/general.md +++ b/docs/reference/api/general.md @@ -185,6 +185,7 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ "base_url": "string", "key": "string" }, + "api_dump_dir": "string", "bedrock": { "access_key": "string", "access_key_secret": "string", @@ -213,7 +214,6 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \ "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index c2cffd13d4..aaee58512c 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -442,6 +442,7 @@ "base_url": "string", "key": "string" }, + "api_dump_dir": "string", "bedrock": { "access_key": "string", "access_key_secret": "string", @@ -470,7 +471,6 @@ "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } @@ -488,6 +488,7 @@ |-------------------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `allow_byok` | boolean | false | | | | `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER__* env vars instead. | +| `api_dump_dir` | string | false | | Api dump dir is the base directory under which each provider's request/response dumps are written, in a subdirectory named after the provider. Empty disables dumping. | | `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | Deprecated: Use Providers with indexed CODER_AI_GATEWAY_PROVIDER__* env vars instead. | | `budget_period` | string | false | | | | `budget_policy` | string | false | | Budget settings for AI Governance cost controls. | @@ -1245,6 +1246,7 @@ "base_url": "string", "key": "string" }, + "api_dump_dir": "string", "bedrock": { "access_key": "string", "access_key_secret": "string", @@ -1273,7 +1275,6 @@ "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } @@ -1344,7 +1345,6 @@ "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } @@ -1358,7 +1358,6 @@ | `bedrock_model` | string | false | | | | `bedrock_region` | string | false | | | | `bedrock_small_fast_model` | string | false | | | -| `dump_dir` | string | false | | Dump dir is the directory path for dumping API requests and responses. | | `name` | string | false | | Name is the unique instance identifier used for routing. Defaults to Type if not provided. | | `type` | string | false | | Type is the provider type: "openai", "anthropic", or "copilot". | @@ -5706,6 +5705,7 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "base_url": "string", "key": "string" }, + "api_dump_dir": "string", "bedrock": { "access_key": "string", "access_key_secret": "string", @@ -5734,7 +5734,6 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } @@ -6305,6 +6304,7 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "base_url": "string", "key": "string" }, + "api_dump_dir": "string", "bedrock": { "access_key": "string", "access_key_secret": "string", @@ -6333,7 +6333,6 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o "bedrock_model": "string", "bedrock_region": "string", "bedrock_small_fast_model": "string", - "dump_dir": "string", "name": "string", "type": "string" } diff --git a/docs/reference/cli/server.md b/docs/reference/cli/server.md index 79c63a5c9c..de694faa79 100644 --- a/docs/reference/cli/server.md +++ b/docs/reference/cli/server.md @@ -1889,6 +1889,16 @@ Emit structured logs for AI Gateway interception records. Use this for exporting Once enabled, extra headers will be added to upstream requests to identify the user (actor) making requests to AI Gateway. This is only needed if you are using a proxy between AI Gateway and an upstream AI provider. This will send X-Ai-Bridge-Actor-Id (the ID of the user making the request) and X-Ai-Bridge-Actor-Metadata-Username (their username). +### --ai-gateway-dump-dir + +| | | +|-------------|-----------------------------------------| +| Type | string | +| Environment | $CODER_AI_GATEWAY_DUMP_DIR | +| YAML | ai_gateway.api_dump_dir | + +Base directory for dumping AI Bridge request/response pairs to disk for debugging. When set, each provider writes under a subdirectory named after the provider. Sensitive headers are redacted. Leave empty to disable. + ### --ai-gateway-allow-byok | | | diff --git a/enterprise/cli/aibridgeproxyd_internal_test.go b/enterprise/cli/aibridgeproxyd_internal_test.go index f95d3414fd..f7b25a549c 100644 --- a/enterprise/cli/aibridgeproxyd_internal_test.go +++ b/enterprise/cli/aibridgeproxyd_internal_test.go @@ -6,11 +6,8 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/coder/coder/v2/aibridge" - agplcli "github.com/coder/coder/v2/cli" - "github.com/coder/coder/v2/codersdk" ) func TestDomainsFromProviders(t *testing.T) { @@ -19,14 +16,11 @@ func TestDomainsFromProviders(t *testing.T) { t.Run("ExtractsHostnames", func(t *testing.T) { t.Parallel() - providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ - Providers: []codersdk.AIProviderConfig{ - {Type: aibridge.ProviderOpenAI, Name: "openai", Keys: []string{"k"}}, - {Type: aibridge.ProviderAnthropic, Name: "anthropic", Keys: []string{"k"}}, - {Type: aibridge.ProviderOpenAI, Name: "custom", Keys: []string{"k"}, BaseURL: "https://custom-llm.example.com:8443/api"}, - }, - }) - require.NoError(t, err) + providers := []aibridge.Provider{ + aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "openai", BaseURL: "https://api.openai.com/v1/"}), + aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{Name: "anthropic", BaseURL: "https://api.anthropic.com/"}, nil), + aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "custom", BaseURL: "https://custom-llm.example.com:8443/api"}), + } domains, mapping := domainsFromProviders(providers) @@ -43,13 +37,10 @@ func TestDomainsFromProviders(t *testing.T) { t.Run("DeduplicatesSameHost", func(t *testing.T) { t.Parallel() - providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ - Providers: []codersdk.AIProviderConfig{ - {Type: aibridge.ProviderOpenAI, Name: "first", Keys: []string{"k"}, BaseURL: "https://api.example.com/v1"}, - {Type: aibridge.ProviderOpenAI, Name: "second", Keys: []string{"k"}, BaseURL: "https://api.example.com/v2"}, - }, - }) - require.NoError(t, err) + providers := []aibridge.Provider{ + aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "first", BaseURL: "https://api.example.com/v1"}), + aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "second", BaseURL: "https://api.example.com/v2"}), + } domains, mapping := domainsFromProviders(providers) @@ -68,12 +59,9 @@ func TestDomainsFromProviders(t *testing.T) { t.Run("CaseInsensitive", func(t *testing.T) { t.Parallel() - providers, err := agplcli.BuildProviders(codersdk.AIBridgeConfig{ - Providers: []codersdk.AIProviderConfig{ - {Type: aibridge.ProviderOpenAI, Name: "provider", Keys: []string{"k"}, BaseURL: "https://API.Example.COM/v1"}, - }, - }) - require.NoError(t, err) + providers := []aibridge.Provider{ + aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{Name: "provider", BaseURL: "https://API.Example.COM/v1"}), + } domains, mapping := domainsFromProviders(providers) diff --git a/enterprise/cli/server.go b/enterprise/cli/server.go index 0ffc730a37..adba4f9218 100644 --- a/enterprise/cli/server.go +++ b/enterprise/cli/server.go @@ -167,7 +167,7 @@ func (r *RootCmd) Server(_ func()) *serpent.Command { // in-memory roundtripper regardless of license); only the proxy // daemon remains enterprise-gated by config. if options.DeploymentValues.AI.BridgeProxyConfig.Enabled.Value() { - providers, err := agplcli.BuildProviders(options.DeploymentValues.AI.BridgeConfig) + providers, err := agplcli.BuildProviders(ctx, options.Database, options.DeploymentValues.AI.BridgeConfig, options.Logger.Named("aibridge.providers")) if err != nil { return nil, nil, xerrors.Errorf("build AI providers: %w", err) } diff --git a/enterprise/cli/testdata/coder_server_--help.golden b/enterprise/cli/testdata/coder_server_--help.golden index 1eab828120..a9062a426f 100644 --- a/enterprise/cli/testdata/coder_server_--help.golden +++ b/enterprise/cli/testdata/coder_server_--help.golden @@ -114,6 +114,12 @@ AI GATEWAY OPTIONS: with AI budgets. "highest" selects the group with the largest spend limit, and is currently the only supported value. + --ai-gateway-dump-dir string, $CODER_AI_GATEWAY_DUMP_DIR + Base directory for dumping AI Bridge request/response pairs to disk + for debugging. When set, each provider writes under a subdirectory + named after the provider. Sensitive headers are redacted. Leave empty + to disable. + --ai-gateway-allow-byok bool, $CODER_AI_GATEWAY_ALLOW_BYOK (default: true) Allow users to provide their own LLM API keys or subscriptions. When disabled, only centralized key authentication is permitted. diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index af46758f9f..29a28130a1 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -82,6 +82,12 @@ export interface AIBridgeConfig { readonly circuit_breaker_interval: number; readonly circuit_breaker_timeout: number; readonly circuit_breaker_max_requests: number; + /** + * APIDumpDir is the base directory under which each provider's + * request/response dumps are written, in a subdirectory named after + * the provider. Empty disables dumping. + */ + readonly api_dump_dir: string; } // From codersdk/aibridge.go @@ -384,10 +390,6 @@ export interface AIProviderConfig { * BaseURL is the base URL of the upstream provider API. */ readonly base_url: string; - /** - * DumpDir is the directory path for dumping API requests and responses. - */ - readonly dump_dir?: string; readonly bedrock_region?: string; readonly bedrock_model?: string; readonly bedrock_small_fast_model?: string;