From 9341efec9f96efd6163c36a86fc9a0c320bbcc7e Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 22 May 2026 08:37:27 +0200 Subject: [PATCH] feat!: seed ai_providers from env on server startup (#24895) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _Disclaimer: implemented by a Coder Agent using Claude Opus 4.7_ Part of the implementation of [RFC: Common AI Provider Configs](https://www.notion.so/coderhq/RFC-Common-AI-Provider-Configs-34bd579be59280ed958feffb82024797) (AIGOV-201). ## Note This change can cause a previously working installation to fail to start should a conflict exist between the providers configured in the environment & those now migrated to the database. I'll raise a PR upstack to document this process and workarounds should a startup fail. ## What this PR does Reconciles environment-derived AI provider configuration with the `ai_providers` table at server startup. The seed runs **before** the aibridged daemon is initialized, so the runtime always reads providers from the database; the legacy `CODER_AIBRIDGE_*` environment variables become a one-shot migration source. ### Behavior - Concurrent server starts are serialized through a Postgres advisory lock (`LockIDAIProvidersEnvSeed`). - Missing rows are inserted with an audit entry attributed to the system actor. - Existing rows whose canonical hash matches the env-derived hash are left alone (the common no-op restart path). - Existing rows whose canonical hash does **not** match cause server startup to fail with a descriptive error so the operator can explicitly resolve the conflict in either env or DB. - Soft-deleted rows are NOT resurrected from env; an explicit operator deletion is sticky across restarts. - Indexed providers whose name conflicts with a legacy env var fail startup with a clear remediation message. - Unknown provider types (e.g. `copilot`, until the DB enum is widened) are skipped with a log entry rather than failing startup. ### Canonical hashing The `canonicalAIProvider` shape captures exactly the fields that determine runtime behavior — `type`, `base_url`, and the Bedrock subset of settings (access key, access key secret, region, model, small fast model) — and is hashed with SHA-256. The hash is **computed on demand from the row + env**, never persisted, so the database does not need a new column for it. API keys live in the separate `ai_provider_keys` table and are intentionally excluded from the hash so operators can rotate keys via the API without forcing a server restart.
Decision log - The hash is intentionally not persisted in the database. The RFC discussed this trade-off; computing on demand keeps the schema minimal and lets the canonical shape evolve without a migration. - The lock uses an `iota` slot in `coderd/database/lock.go` rather than `GenLockID` so it's stable, easy to audit, and matches the convention used for every other startup lock. - A bearer-token Anthropic provider whose env vars also set Bedrock metadata but no AWS credentials does NOT store the Bedrock fields. Without credentials the discriminated settings would misrepresent the row as Bedrock auth. - We deliberately do NOT publish to the `ai_providers_changed` pubsub channel from the seed because the seed completes before any subscriber is started; the follow-up PR introduces that channel.
--- cli/server.go | 66 +++- cli/server_aibridge_internal_test.go | 102 +++++- coderd/ai_providers_migrate.go | 406 +++++++++++++++++++++ coderd/ai_providers_migrate_test.go | 526 +++++++++++++++++++++++++++ coderd/database/dbauthz/dbauthz.go | 1 + coderd/database/lock.go | 1 + codersdk/aiproviders_bedrock.go | 56 +++ enterprise/cli/aibridged.go | 26 +- enterprise/cli/server.go | 4 +- 9 files changed, 1170 insertions(+), 18 deletions(-) create mode 100644 coderd/ai_providers_migrate.go create mode 100644 coderd/ai_providers_migrate_test.go diff --git a/cli/server.go b/cli/server.go index aac58710a7..e7c6f3cde7 100644 --- a/cli/server.go +++ b/cli/server.go @@ -862,6 +862,10 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. } vals.AI.BridgeConfig.Providers = append(vals.AI.BridgeConfig.Providers, aiProviders...) + if err := validateLegacyAIBridgeConfig(vals.AI.BridgeConfig); err != nil { + return xerrors.Errorf("validate legacy AI bridge config: %w", err) + } + // Manage push notifications. webpusher, err := webpush.New(ctx, ptr.Ref(options.Logger.Named("webpush")), options.Database, options.AccessURL.String()) if err != nil { @@ -1010,6 +1014,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. return xerrors.Errorf("create coder API: %w", err) } + // Runs unconditionally so operators can seed providers via + // env without enabling the bridge or proxy features. + if err := coderd.SeedAIProvidersFromEnv( + ctx, + options.Database, + vals.AI.BridgeConfig, + options.Auditor, + logger.Named("aibridge.envseed"), + ); err != nil { + return xerrors.Errorf("seed ai providers from env: %w", err) + } + if vals.Prometheus.Enable { // Agent metrics require reference to the tailnet coordinator, so must be initiated after Coder API. closeAgentsFunc, err := prometheusmetrics.Agents(ctx, logger, options.PrometheusRegistry, coderAPI.Database, &coderAPI.TailnetCoordinator, coderAPI.DERPMap, coderAPI.Options.AgentInactiveDisconnectTimeout, 0) @@ -2961,7 +2977,20 @@ func ReadAIProvidersFromEnv(logger slog.Logger, environ []string) ([]codersdk.AI i, p.Type, aibridge.ProviderOpenAI, aibridge.ProviderAnthropic, aibridge.ProviderCopilot) } - if p.Type != aibridge.ProviderAnthropic && hasBedrockFields(*p) { + var bedrockKey, bedrockSecret string + if len(p.BedrockAccessKeys) > 0 { + bedrockKey = p.BedrockAccessKeys[0] + } + if len(p.BedrockAccessKeySecrets) > 0 { + bedrockSecret = p.BedrockAccessKeySecrets[0] + } + settings := codersdk.NewAIProviderBedrockSettings( + p.BedrockRegion, bedrockKey, bedrockSecret, + p.BedrockModel, p.BedrockSmallFastModel, + ) + isBedrock := codersdk.IsBedrockConfigured(p.BedrockBaseURL, settings) + + if p.Type != aibridge.ProviderAnthropic && isBedrock { return nil, xerrors.Errorf("provider %d (%s): BEDROCK_* fields are only supported with TYPE %q", i, p.Type, aibridge.ProviderAnthropic) } @@ -2971,6 +3000,15 @@ func ReadAIProvidersFromEnv(logger slog.Logger, environ []string) ([]codersdk.AI i, p.Type, aibridge.ProviderCopilot) } + // An Anthropic provider authenticates either via a bearer + // token (KEYS) or via Bedrock (BEDROCK_*), not both. Surface + // the conflict here so misconfigured deployments fail before + // any DB work happens at server startup. + if p.Type == aibridge.ProviderAnthropic && len(p.Keys) > 0 && isBedrock { + return nil, xerrors.Errorf("provider %d (%s): KEY/KEYS and BEDROCK_* fields are mutually exclusive", + i, p.Type) + } + if err := validateProviderCredentialList(i, p.Type, p.Keys); err != nil { return nil, err } @@ -3092,10 +3130,28 @@ func readAIProvidersForPrefix(logger slog.Logger, environ []string, prefix strin return providers, nil } -func hasBedrockFields(p codersdk.AIProviderConfig) bool { - return p.BedrockBaseURL != "" || p.BedrockRegion != "" || - len(p.BedrockAccessKeys) > 0 || len(p.BedrockAccessKeySecrets) > 0 || - p.BedrockModel != "" || p.BedrockSmallFastModel != "" +// validateLegacyAIBridgeConfig enforces invariants on the legacy +// single-provider env vars (CODER_AIBRIDGE_ANTHROPIC_KEY, +// CODER_AIBRIDGE_BEDROCK_*) that the indexed validator above can't +// catch because legacy fields live outside cfg.Providers. +func validateLegacyAIBridgeConfig(cfg codersdk.AIBridgeConfig) error { + // An Anthropic provider authenticates either via a bearer token + // or via Bedrock, not both. Fields without serpent-level + // defaults (region, base URL, credentials) reliably indicate + // operator intent; Model and SmallFastModel are excluded because + // they have defaults. + settings := codersdk.NewAIProviderBedrockSettings( + cfg.LegacyBedrock.Region.String(), + cfg.LegacyBedrock.AccessKey.String(), + cfg.LegacyBedrock.AccessKeySecret.String(), + cfg.LegacyBedrock.Model.String(), + cfg.LegacyBedrock.SmallFastModel.String(), + ) + hasBedrock := codersdk.IsBedrockConfigured(cfg.LegacyBedrock.BaseURL.String(), settings) + if cfg.LegacyAnthropic.Key.String() != "" && hasBedrock { + return xerrors.New("CODER_AIBRIDGE_ANTHROPIC_KEY and CODER_AIBRIDGE_BEDROCK_* are mutually exclusive") + } + return nil } // maxKeysPerProvider is the maximum number of keys allowed per diff --git a/cli/server_aibridge_internal_test.go b/cli/server_aibridge_internal_test.go index 6e9d69469d..8afed4c749 100644 --- a/cli/server_aibridge_internal_test.go +++ b/cli/server_aibridge_internal_test.go @@ -194,12 +194,26 @@ func TestReadAIProvidersFromEnv(t *testing.T) { }, }, { - // KEYS, BEDROCK_ACCESS_KEYS, and BEDROCK_ACCESS_KEY_SECRETS - // are plural aliases for their singular counterparts. - name: "PluralKeyAliases", + // KEYS is a plural alias for KEY. + name: "PluralKeysAlias", env: []string{ "CODER_AIBRIDGE_PROVIDER_0_TYPE=anthropic", "CODER_AIBRIDGE_PROVIDER_0_KEYS=sk-ant-xxx", + }, + expected: []codersdk.AIProviderConfig{ + { + Type: aibridge.ProviderAnthropic, + Name: aibridge.ProviderAnthropic, + Keys: []string{"sk-ant-xxx"}, + }, + }, + }, + { + // BEDROCK_ACCESS_KEYS and BEDROCK_ACCESS_KEY_SECRETS are + // plural aliases for their singular counterparts. + name: "PluralBedrockAliases", + env: []string{ + "CODER_AIBRIDGE_PROVIDER_0_TYPE=anthropic", "CODER_AIBRIDGE_PROVIDER_0_BEDROCK_ACCESS_KEYS=AKID", "CODER_AIBRIDGE_PROVIDER_0_BEDROCK_ACCESS_KEY_SECRETS=secret", }, @@ -207,12 +221,23 @@ func TestReadAIProvidersFromEnv(t *testing.T) { { Type: aibridge.ProviderAnthropic, Name: aibridge.ProviderAnthropic, - Keys: []string{"sk-ant-xxx"}, BedrockAccessKeys: []string{"AKID"}, BedrockAccessKeySecrets: []string{"secret"}, }, }, }, + { + // An Anthropic provider can't use both a bearer token + // (KEYS) and Bedrock (BEDROCK_*); they're mutually + // exclusive authentication modes. + name: "AnthropicKeysAndBedrockConflict", + env: []string{ + "CODER_AIBRIDGE_PROVIDER_0_TYPE=anthropic", + "CODER_AIBRIDGE_PROVIDER_0_KEYS=sk-ant-xxx", + "CODER_AIBRIDGE_PROVIDER_0_BEDROCK_REGION=us-east-1", + }, + errContains: "KEY/KEYS and BEDROCK_* fields are mutually exclusive", + }, { name: "ConflictKeyAndKeys", env: []string{ @@ -443,3 +468,72 @@ func TestReadAIProvidersFromEnv(t *testing.T) { } }) } + +func TestValidateLegacyAIBridgeConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + cfg codersdk.AIBridgeConfig + errContains string + }{ + { + name: "BareAnthropicKey", + cfg: codersdk.AIBridgeConfig{ + LegacyAnthropic: codersdk.AIBridgeAnthropicConfig{Key: "sk-ant"}, + }, + }, + { + name: "BareBedrockRegion", + cfg: codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{Region: "us-east-1"}, + }, + }, + { + name: "BedrockCredentialsOnly", + cfg: codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + AccessKey: "AKIA", + AccessKeySecret: "secret", + }, + }, + }, + { + name: "AnthropicKeyAndBedrockConflict", + cfg: codersdk.AIBridgeConfig{ + LegacyAnthropic: codersdk.AIBridgeAnthropicConfig{Key: "sk-ant"}, + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Region: "us-east-1", + AccessKey: "AKIA", + AccessKeySecret: "secret", + }, + }, + errContains: "CODER_AIBRIDGE_ANTHROPIC_KEY and CODER_AIBRIDGE_BEDROCK_* are mutually exclusive", + }, + { + name: "AnthropicKeyWithBedrockModelDefaultsIsFine", + cfg: codersdk.AIBridgeConfig{ + LegacyAnthropic: codersdk.AIBridgeAnthropicConfig{Key: "sk-ant"}, + // Model defaults shouldn't trip the conflict; they're + // always populated in a real deployment. + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Model: "anthropic.claude-3-5-sonnet", + SmallFastModel: "anthropic.claude-3-5-haiku", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + err := validateLegacyAIBridgeConfig(tt.cfg) + if tt.errContains == "" { + require.NoError(t, err) + return + } + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + }) + } +} diff --git a/coderd/ai_providers_migrate.go b/coderd/ai_providers_migrate.go new file mode 100644 index 0000000000..783c062986 --- /dev/null +++ b/coderd/ai_providers_migrate.go @@ -0,0 +1,406 @@ +package coderd + +import ( + "context" + "crypto/sha256" + "database/sql" + "encoding/hex" + "encoding/json" + "maps" + "slices" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog/v3" + "github.com/coder/coder/v2/aibridge" + aibridgeutils "github.com/coder/coder/v2/aibridge/utils" + "github.com/coder/coder/v2/coderd/audit" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/db2sdk" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/codersdk" +) + +// SeedAIProvidersFromEnv reconciles the deployment's environment- +// derived AI provider configuration with rows in the ai_providers +// table at server startup. Concurrent server starts are serialized via a +// Postgres advisory lock; rows that already exist with a matching +// canonical hash are left alone, missing rows are inserted, and rows +// whose hash differs from the env-derived value cause startup to fail +// with a descriptive error. +// +// API keys derived from env vars are inserted into ai_provider_keys at +// the time the provider row is first created. We do NOT add env-sourced +// keys to a provider that already has keys, because operators may have +// added or rotated keys via the API after the initial seed and we do +// not want to clobber that state on every restart. +// +// Only env-sourced providers participate in the seed; rows created via +// the HTTP CRUD endpoints are not affected. +// +// Audit entries are recorded via the system actor for any inserts. +func SeedAIProvidersFromEnv( + ctx context.Context, + db database.Store, + cfg codersdk.AIBridgeConfig, + auditor audit.Auditor, + logger slog.Logger, +) error { + desired, err := providersFromEnv(ctx, cfg, logger) + if err != nil { + return xerrors.Errorf("compute providers from env: %w", err) + } + if len(desired) == 0 { + return nil + } + + // Audit entries are attributed to the deployment rather than a user. + //nolint:gocritic // server startup, no user actor available + sysCtx := dbauthz.AsSystemRestricted(ctx) + + // Collect inserted rows inside the transaction and emit audit + // entries only after the transaction commits. The auditor writes + // through the outer db handle, so emitting inside InTx would leave + // phantom audit rows if the transaction later rolls back. + var ( + insertedProviders []database.AIProvider + insertedKeys []database.AIProviderKey + ) + + err = db.InTx(func(tx database.Store) error { + insertedProviders = insertedProviders[:0] + insertedKeys = insertedKeys[:0] + + // Acquire the advisory lock. The lock is released when the + // transaction ends. + if err := tx.AcquireLock(sysCtx, database.LockIDAIProvidersEnvSeed); err != nil { + return xerrors.Errorf("acquire ai providers env seed lock: %w", err) + } + + // Load every provider (including soft-deleted and disabled rows) + // once so we can decide insert vs. skip vs. drift per desired + // row without a query per name. + all, err := tx.GetAIProviders(sysCtx, database.GetAIProvidersParams{ + IncludeDeleted: true, + IncludeDisabled: true, + }) + if err != nil { + return xerrors.Errorf("load ai providers: %w", err) + } + // Prefer the live row when a soft-deleted row shares its name. + byName := make(map[string]database.AIProvider, len(all)) + for _, row := range all { + if existing, ok := byName[row.Name]; ok && !existing.Deleted && row.Deleted { + continue + } + byName[row.Name] = row + } + + for _, dp := range desired { + settings, err := encodeAIProviderSettings(codersdk.AIProviderSettings{Bedrock: dp.Bedrock}) + if err != nil { + return xerrors.Errorf("encode settings for %q: %w", dp.Name, err) + } + + existing, found := byName[dp.Name] + switch { + case found && existing.Deleted: + // The provider was created here, then explicitly + // deleted by an operator. We do NOT re-create it + // from env; the operator's deletion is sticky. + logger.Warn(sysCtx, "skipping env-seeded ai provider that was previously soft-deleted", + slog.F("name", dp.Name)) + continue + case found: + existingSettings, err := db2sdk.AIProviderSettings(existing.Settings) + if err != nil { + return xerrors.Errorf("decode existing settings for %q: %w", dp.Name, err) + } + existingDP := desiredAIProvider{ + Type: existing.Type, + BaseURL: existing.BaseUrl, + Bedrock: existingSettings.Bedrock, + } + existingHash := computeProviderHash(existingDP.canonical()) + if existingHash == dp.Hash { + continue + } + return xerrors.Errorf("AI provider %q already exists in the database and differs from the current environment configuration; update the provider through the API or remove the CODER_AIBRIDGE_* env vars to stop seeding it", dp.Name) + } + + row, err := tx.InsertAIProvider(sysCtx, database.InsertAIProviderParams{ + ID: uuid.New(), + Type: dp.Type, + Name: dp.Name, + DisplayName: sql.NullString{String: dp.Name, Valid: true}, + Enabled: true, + BaseUrl: dp.BaseURL, + Settings: settings, + SettingsKeyID: sql.NullString{}, + }) + if err != nil { + return xerrors.Errorf("insert ai provider %q: %w", dp.Name, err) + } + insertedProviders = append(insertedProviders, row) + + // Insert one ai_provider_keys row per env-supplied key. + now := dbtime.Now() + for _, key := range dp.Keys { + if key == "" { + continue + } + keyRow, err := tx.InsertAIProviderKey(sysCtx, database.InsertAIProviderKeyParams{ + ID: uuid.New(), + ProviderID: row.ID, + APIKey: key, + ApiKeyKeyID: sql.NullString{}, + CreatedAt: now, + UpdatedAt: now, + }) + if err != nil { + return xerrors.Errorf("insert ai provider key for %q: %w", dp.Name, err) + } + insertedKeys = append(insertedKeys, keyRow) + } + + logger.Info(sysCtx, "seeded ai provider from environment", + slog.F("name", dp.Name), + slog.F("type", string(dp.Type)), + slog.F("key_count", len(dp.Keys)), + ) + } + return nil + }, nil) + if err != nil { + return err + } + + for _, row := range insertedProviders { + audit.BackgroundAudit(sysCtx, &audit.BackgroundAuditParams[database.AIProvider]{ + Audit: auditor, + Log: logger, + Action: database.AuditActionCreate, + New: row, + }) + } + for _, keyRow := range insertedKeys { + // Mask the plaintext key before it enters the audit pipeline; + // the audit policy on api_key relies on the masked rendering + // so plaintext never reaches a backend. + auditRow := keyRow + auditRow.APIKey = aibridgeutils.MaskSecret(auditRow.APIKey) + audit.BackgroundAudit(sysCtx, &audit.BackgroundAuditParams[database.AIProviderKey]{ + Audit: auditor, + Log: logger, + Action: database.AuditActionCreate, + New: auditRow, + }) + } + return nil +} + +// canonicalAIProvider is the shape we hash to detect drift between the +// configured environment and the row stored in the database. The fields +// we hash are exactly the operator-controllable inputs that affect +// runtime behavior. Credentials are intentionally NOT part of the hash +// so operators can rotate them via the API without forcing a server +// restart. This applies to both bearer API keys (stored in +// ai_provider_keys) and to Bedrock access key/secret pairs (stored in +// the settings blob because Bedrock authenticates via settings rather +// than a bearer token). +// Model and SmallFastModel are excluded: they're tunables, and their +// serpent defaults shift across releases. +type canonicalAIProvider struct { + Type string `json:"type"` + BaseURL string `json:"base_url"` + BedrockRegion string `json:"bedrock_region"` +} + +// desiredAIProvider is a normalized provider description sourced from +// environment configuration that we want to materialize as a row. +type desiredAIProvider struct { + Name string + Type database.AIProviderType + // BaseURL is the upstream provider's HTTP endpoint. + BaseURL string + // Keys is the list of API keys to seed into ai_provider_keys for + // non-Bedrock providers. Bedrock providers have no entries here + // because they authenticate via the encrypted settings blob. + Keys []string + // Bedrock holds the Bedrock-specific settings when the provider + // targets AWS Bedrock; nil otherwise. + Bedrock *codersdk.AIProviderBedrockSettings + Hash string +} + +func (d desiredAIProvider) canonical() canonicalAIProvider { + c := canonicalAIProvider{ + Type: string(d.Type), + BaseURL: d.BaseURL, + } + if d.Bedrock != nil { + c.BedrockRegion = d.Bedrock.Region + } + return c +} + +func computeProviderHash(c canonicalAIProvider) string { + // json.Marshal is deterministic for structs because field order is + // fixed by the struct definition. + b, _ := json.Marshal(c) + sum := sha256.Sum256(b) + return hex.EncodeToString(sum[:]) +} + +// providersFromEnv normalizes the deployment-values AI Bridge config +// (legacy single-provider env vars and indexed CODER_AIBRIDGE_PROVIDER__* +// env vars) into the deduplicated set of providers we want present in +// the database. Conflicts between legacy and indexed providers under +// the same canonical name are surfaced as errors. +func providersFromEnv(ctx context.Context, cfg codersdk.AIBridgeConfig, logger slog.Logger) ([]desiredAIProvider, error) { + out := make(map[string]desiredAIProvider) + legacyNames := make(map[string]bool) + + addLegacy := func(name string, p desiredAIProvider) { + out[name] = p + legacyNames[name] = true + } + + // Legacy OpenAI. + if cfg.LegacyOpenAI.Key.String() != "" { + dp := desiredAIProvider{ + Name: aibridge.ProviderOpenAI, + Type: database.AiProviderTypeOpenai, + BaseURL: cfg.LegacyOpenAI.BaseURL.String(), + Keys: []string{cfg.LegacyOpenAI.Key.String()}, + } + dp.Hash = computeProviderHash(dp.canonical()) + addLegacy(aibridge.ProviderOpenAI, dp) + } + + // Legacy Anthropic + Bedrock. Anthropic is enabled if either an + // Anthropic key OR any Bedrock setting is explicitly configured. + // Detection goes through AIProviderBedrockSettings.IsConfigured() + // so the legacy and indexed paths agree on what counts as a + // Bedrock provider. + bedrock := codersdk.NewAIProviderBedrockSettings( + cfg.LegacyBedrock.Region.String(), + cfg.LegacyBedrock.AccessKey.String(), + cfg.LegacyBedrock.AccessKeySecret.String(), + cfg.LegacyBedrock.Model.String(), + cfg.LegacyBedrock.SmallFastModel.String(), + ) + hasAnthropicKey := cfg.LegacyAnthropic.Key.String() != "" + hasLegacyBedrock := codersdk.IsBedrockConfigured(cfg.LegacyBedrock.BaseURL.String(), bedrock) + if hasAnthropicKey || hasLegacyBedrock { + dp := desiredAIProvider{ + Name: aibridge.ProviderAnthropic, + Type: database.AiProviderTypeAnthropic, + } + if hasLegacyBedrock { + // Bedrock-only deployments use CODER_AIBRIDGE_BEDROCK_BASE_URL + // for custom VPC, FIPS, or proxy endpoints. + dp.BaseURL = cfg.LegacyBedrock.BaseURL.String() + dp.Bedrock = &bedrock + } else { + dp.BaseURL = cfg.LegacyAnthropic.BaseURL.String() + dp.Keys = []string{cfg.LegacyAnthropic.Key.String()} + } + dp.Hash = computeProviderHash(dp.canonical()) + addLegacy(aibridge.ProviderAnthropic, dp) + } + + // Indexed providers. + for _, p := range cfg.Providers { + name := p.Name + if name == "" { + name = p.Type + } + if name == "" { + return nil, xerrors.Errorf("indexed AI provider must have a name or type") + } + // Reject invalid characters here so that bad env values + // fail startup rather than producing a hidden runtime row. + if !codersdk.AIProviderNameRegex.MatchString(name) { + return nil, xerrors.Errorf("invalid AI provider name %q: must match %s", name, codersdk.AIProviderNameRegex) + } + + dp := desiredAIProvider{ + Name: name, + } + switch p.Type { + case aibridge.ProviderOpenAI: + dp.Type = database.AiProviderTypeOpenai + case aibridge.ProviderAnthropic: + dp.Type = database.AiProviderTypeAnthropic + default: + // Skip other types (e.g. copilot) until they are added + // to the database enum. + logger.Warn(ctx, "skipping indexed AI provider with unsupported type", + slog.F("name", name), + slog.F("type", p.Type), + ) + continue + } + + dp.BaseURL = p.BaseURL + // Bedrock fields only apply to Anthropic. Detection goes + // through AIProviderBedrockSettings.IsConfigured() so the + // legacy and indexed paths agree on what counts as a Bedrock + // provider. + isBedrock := false + if dp.Type == database.AiProviderTypeAnthropic { + var accessKey, accessKeySecret string + if len(p.BedrockAccessKeys) > 0 { + accessKey = p.BedrockAccessKeys[0] + } + if len(p.BedrockAccessKeySecrets) > 0 { + accessKeySecret = p.BedrockAccessKeySecrets[0] + } + bedrock := codersdk.NewAIProviderBedrockSettings( + p.BedrockRegion, + accessKey, + accessKeySecret, + p.BedrockModel, + p.BedrockSmallFastModel, + ) + isBedrock = codersdk.IsBedrockConfigured(p.BedrockBaseURL, bedrock) + if isBedrock { + dp.Bedrock = &bedrock + // Always overwrite the generic BaseURL so removing + // BASE_URL later doesn't trigger drift. Empty is fine: + // the runtime derives the endpoint from the region. + dp.BaseURL = p.BedrockBaseURL + } + } + // Non-Bedrock providers carry their bearer keys in + // ai_provider_keys. Bedrock providers authenticate via the + // settings blob and have no keys; cli/server.go rejects + // configs that set both before we get here. + if !isBedrock { + dp.Keys = append(dp.Keys, p.Keys...) + } + + dp.Hash = computeProviderHash(dp.canonical()) + if legacyNames[name] { + return nil, xerrors.Errorf("indexed AI provider %q conflicts with the legacy env var of the same name; remove one or the other", name) + } + if existing, ok := out[name]; ok { + if existing.Hash != dp.Hash { + return nil, xerrors.Errorf("duplicate AI provider name %q with conflicting fields", name) + } + continue + } + out[name] = dp + } + + // Stable order so audit log entries are deterministic across + // restarts, which makes comparison in tests trivial. + res := make([]desiredAIProvider, 0, len(out)) + for _, name := range slices.Sorted(maps.Keys(out)) { + res = append(res, out[name]) + } + return res, nil +} diff --git a/coderd/ai_providers_migrate_test.go b/coderd/ai_providers_migrate_test.go new file mode 100644 index 0000000000..2599b6dffb --- /dev/null +++ b/coderd/ai_providers_migrate_test.go @@ -0,0 +1,526 @@ +package coderd_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "cdr.dev/slog/v3" + "cdr.dev/slog/v3/sloggers/slogtest" + "github.com/coder/coder/v2/coderd" + "github.com/coder/coder/v2/coderd/audit" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/testutil" + "github.com/coder/serpent" +) + +func TestSeedAIProvidersFromEnv(t *testing.T) { + t.Parallel() + + t.Run("EmptyConfigNoOp", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + err := coderd.SeedAIProvidersFromEnv(ctx, db, codersdk.AIBridgeConfig{}, auditor, testLogger(t)) + require.NoError(t, err) + require.Empty(t, auditor.AuditLogs()) + }) + + t.Run("LegacyOpenAI", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyOpenAI: codersdk.AIBridgeOpenAIConfig{ + BaseURL: serpent.String("https://api.openai.com/v1"), + Key: serpent.String("sk-legacy"), + }, + } + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.NoError(t, err) + + // One row exists for "openai". + row, err := db.GetAIProviderByName(ctx, "openai") + require.NoError(t, err) + require.Equal(t, database.AiProviderTypeOpenai, row.Type) + require.Equal(t, "https://api.openai.com/v1", row.BaseUrl) + require.True(t, row.Enabled) + + // One ai_provider_keys row was created with the env key. + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, "sk-legacy", keys[0].APIKey) + + // The first seed must have emitted audit entries for the new + // provider row and the key row. + require.GreaterOrEqual(t, len(auditor.AuditLogs()), 2) + + // Re-running with the same config is a no-op (no errors, no + // new audit logs because the row matches). + auditor.ResetLogs() + err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.NoError(t, err) + require.Empty(t, auditor.AuditLogs()) + + // Verify there's still only one row and one key. + all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{}) + require.NoError(t, err) + require.Len(t, all, 1) + keys, err = db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Len(t, keys, 1) + }) + + t.Run("DriftFailsStartup", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyOpenAI: codersdk.AIBridgeOpenAIConfig{ + BaseURL: serpent.String("https://api.openai.com/v1"), + Key: serpent.String("sk-original"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + // Changing the API key alone does NOT count as drift: keys + // live in a separate table and operators rotate them via the + // API. Only changes to non-credential provider-level fields + // (base_url, type, Bedrock region/model) trip the drift check. + cfg.LegacyOpenAI.Key = serpent.String("sk-rotated") + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + // Changing the base URL is real drift. + cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2") + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") + }) + + t.Run("BedrockCredentialRotationIsNotDrift", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Region: serpent.String("us-east-1"), + AccessKey: serpent.String("AKIA-original"), + AccessKeySecret: serpent.String("secret-original"), + Model: serpent.String("anthropic.claude-3-5-sonnet"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + // Rotating the Bedrock access key and secret in env must NOT + // trip the drift check: they're credentials, equivalent to + // bearer API keys, and operators rotate them via the API. + cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-rotated") + cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-rotated") + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + // Changing the Bedrock region (a non-credential field) is + // real drift. + cfg.LegacyBedrock.Region = serpent.String("us-west-2") + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") + }) + + t.Run("LegacyBedrockOnlyKeepsBedrockSettings", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + // Bedrock fields without an Anthropic key produce a Bedrock- + // authenticated Anthropic provider with no bearer keys. + cfg := codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Region: serpent.String("us-west-2"), + AccessKey: serpent.String("AKIA"), + AccessKeySecret: serpent.String("secret"), + Model: serpent.String("anthropic.claude-3-5-sonnet"), + SmallFastModel: serpent.String("anthropic.claude-3-5-haiku"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "anthropic") + require.NoError(t, err) + require.Equal(t, database.AiProviderTypeAnthropic, row.Type) + require.Contains(t, row.Settings.String, "us-west-2") + require.Contains(t, row.Settings.String, "anthropic.claude-3-5-sonnet") + require.Contains(t, row.Settings.String, "anthropic.claude-3-5-haiku") + require.Contains(t, row.Settings.String, "AKIA") + require.Contains(t, row.Settings.String, "secret") + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Empty(t, keys, "Bedrock provider must not seed bearer keys") + }) + + t.Run("LegacyAnthropicKeyOnlyIgnoresBedrockModelDefaults", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + // LegacyBedrock.Model and LegacyBedrock.SmallFastModel both + // have serpent-level defaults that are always populated in a + // real deployment. Apply those defaults here so the test + // reflects deployment state rather than a hand-crafted config, + // then set only the Anthropic key. The result must be a pure + // bearer-token Anthropic row with no Bedrock settings blob. + dv := codersdk.DeploymentValues{} + opts := dv.Options() + require.NoError(t, opts.SetDefaults()) + // Sanity check: the defaults we rely on are present. + require.NotEmpty(t, dv.AI.BridgeConfig.LegacyBedrock.Model.String()) + require.NotEmpty(t, dv.AI.BridgeConfig.LegacyBedrock.SmallFastModel.String()) + + cfg := dv.AI.BridgeConfig + cfg.LegacyAnthropic.Key = serpent.String("sk-ant-only") + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "anthropic") + require.NoError(t, err) + require.False(t, row.Settings.Valid, "model defaults alone must not produce a Bedrock settings blob") + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, "sk-ant-only", keys[0].APIKey) + }) + + t.Run("BedrockWithoutCredentialsUsesAWSEnvAuth", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + // Any non-empty Bedrock field signals Bedrock auth. AWS + // credentials are optional because Bedrock can authenticate + // via the AWS environment (instance profile, AWS_PROFILE, etc.). + cfg := codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Region: serpent.String("us-east-1"), + Model: serpent.String("anthropic.claude-3-5-sonnet"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "anthropic") + require.NoError(t, err) + require.True(t, row.Settings.Valid, "Bedrock metadata must produce a settings blob") + require.Contains(t, row.Settings.String, "us-east-1") + require.Contains(t, row.Settings.String, "anthropic.claude-3-5-sonnet") + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Empty(t, keys, "Bedrock provider must not seed bearer keys") + }) + + t.Run("BedrockOnlyAnthropic", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyBedrock: codersdk.AIBridgeBedrockConfig{ + Region: serpent.String("us-east-1"), + AccessKey: serpent.String("AKIAONLY"), + AccessKeySecret: serpent.String("secretonly"), + Model: serpent.String("anthropic.claude-3-5-sonnet"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + row, err := db.GetAIProviderByName(ctx, "anthropic") + require.NoError(t, err) + require.Contains(t, row.Settings.String, "us-east-1") + require.Contains(t, row.Settings.String, "AKIAONLY") + require.Contains(t, row.Settings.String, "secretonly") + // Bedrock-only Anthropic has zero ai_provider_keys: it + // authenticates via the settings blob. + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Empty(t, keys) + }) + + t.Run("IndexedProviders", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "primary-openai", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-1", "sk-2"}, + }, + { + Type: "anthropic", + Name: "primary-anthropic", + BaseURL: "https://api.anthropic.com/", + Keys: []string{"sk-ant-1"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + oa, err := db.GetAIProviderByName(ctx, "primary-openai") + require.NoError(t, err) + require.Equal(t, database.AiProviderTypeOpenai, oa.Type) + oaKeys, err := db.GetAIProviderKeysByProviderID(ctx, oa.ID) + require.NoError(t, err) + require.Len(t, oaKeys, 2) + gotKeys := []string{oaKeys[0].APIKey, oaKeys[1].APIKey} + require.ElementsMatch(t, []string{"sk-1", "sk-2"}, gotKeys) + + an, err := db.GetAIProviderByName(ctx, "primary-anthropic") + require.NoError(t, err) + require.Equal(t, database.AiProviderTypeAnthropic, an.Type) + // Plain bearer-token Anthropic with no Bedrock fields: no + // settings blob, one bearer key. + require.False(t, an.Settings.Valid, "no settings blob for bearer-token Anthropic") + anKeys, err := db.GetAIProviderKeysByProviderID(ctx, an.ID) + require.NoError(t, err) + require.Len(t, anKeys, 1) + require.Equal(t, "sk-ant-1", anKeys[0].APIKey) + }) + + t.Run("BedrockIndexedProviderHasNoKeys", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "anthropic", + Name: "bedrock-anthropic", + BaseURL: "https://bedrock-runtime.us-east-1.amazonaws.com/", + BedrockRegion: "us-east-1", + BedrockModel: "anthropic.claude-3-5-sonnet", + BedrockAccessKeys: []string{"AKIA-indexed"}, + BedrockAccessKeySecrets: []string{"indexed-secret"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "bedrock-anthropic") + require.NoError(t, err) + require.Contains(t, row.Settings.String, "AKIA-indexed") + require.Contains(t, row.Settings.String, "indexed-secret") + // Crucially, no ai_provider_keys rows for Bedrock providers. + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Empty(t, keys, "Bedrock providers must not seed bearer keys") + }) + + t.Run("LegacyAndIndexedSameNameConflict", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyOpenAI: codersdk.AIBridgeOpenAIConfig{ + BaseURL: serpent.String("https://api.openai.com/v1"), + Key: serpent.String("sk-legacy"), + }, + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "openai", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-indexed"}, + }, + }, + } + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "conflicts") + }) + + t.Run("InvalidProviderName", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "Bad_Name", + BaseURL: "https://api.openai.com/v1", + }, + }, + } + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid AI provider name") + }) + + t.Run("UnknownProviderTypeIsSkipped", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "copilot", + Name: "gh-copilot", + BaseURL: "https://api.githubcopilot.com/", + }, + { + Type: "openai", + Name: "real-openai", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{}) + require.NoError(t, err) + require.Len(t, all, 1) + require.Equal(t, "real-openai", all[0].Name) + }) + + t.Run("SoftDeletedRowIsNotResurrected", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyOpenAI: codersdk.AIBridgeOpenAIConfig{ + BaseURL: serpent.String("https://api.openai.com/v1"), + Key: serpent.String("sk-original"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "openai") + require.NoError(t, err) + require.NoError(t, db.DeleteAIProviderByID(ctx, row.ID)) + + // Re-run seed; the soft-deleted row should remain soft-deleted + // and no new row should be created. + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{}) + require.NoError(t, err) + require.Empty(t, all, "expected no active rows after soft-delete + re-seed") + }) + + t.Run("ExistingKeysArePreserved", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + cfg := codersdk.AIBridgeConfig{ + LegacyOpenAI: codersdk.AIBridgeOpenAIConfig{ + BaseURL: serpent.String("https://api.openai.com/v1"), + Key: serpent.String("sk-original"), + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + row, err := db.GetAIProviderByName(ctx, "openai") + require.NoError(t, err) + + // Operator rotates the env key. The seed must not duplicate + // keys on a row that already exists; the new key is only + // installed via the API/CRUD layer in this flow. + cfg.LegacyOpenAI.Key = serpent.String("sk-rotated") + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) + require.NoError(t, err) + require.Len(t, keys, 1, "env reseed must not duplicate keys on existing rows") + require.Equal(t, "sk-original", keys[0].APIKey) + }) + + t.Run("IndexedDuplicateNameMatchingHashDedupes", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + // Two entries under the same name with identical canonical + // fields are deduplicated silently. + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-1"}, + }, + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-1"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t))) + + all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{}) + require.NoError(t, err) + require.Len(t, all, 1, "duplicate indexed entries with matching hash must produce a single row") + }) + + t.Run("IndexedDuplicateNameMismatchingHashFails", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + auditor := audit.NewMock() + + // Same name, different canonical fields: must be rejected. + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-1"}, + }, + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v2", + Keys: []string{"sk-2"}, + }, + }, + } + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, auditor, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "conflicting fields") + }) +} + +func testLogger(t *testing.T) slog.Logger { + t.Helper() + return slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) +} diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 4ce6720e49..a76ced7ceb 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -455,6 +455,7 @@ var ( rbac.ResourceOauth2App.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, rbac.ResourceOauth2AppSecret.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, rbac.ResourceChat.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, + rbac.ResourceAIProvider.Type: {policy.ActionCreate, policy.ActionRead, policy.ActionUpdate, policy.ActionDelete}, }), User: []rbac.Permission{}, ByOrgID: map[string]rbac.OrgPermissions{}, diff --git a/coderd/database/lock.go b/coderd/database/lock.go index 41505a2b99..8d0894abc8 100644 --- a/coderd/database/lock.go +++ b/coderd/database/lock.go @@ -15,6 +15,7 @@ const ( LockIDReconcilePrebuilds LockIDReconcileSystemRoles LockIDBoundaryUsageStats + LockIDAIProvidersEnvSeed ) // GenLockID generates a unique and consistent lock ID from a given string. diff --git a/codersdk/aiproviders_bedrock.go b/codersdk/aiproviders_bedrock.go index 6d7a028248..88edcb0017 100644 --- a/codersdk/aiproviders_bedrock.go +++ b/codersdk/aiproviders_bedrock.go @@ -32,6 +32,62 @@ type AIProviderBedrockSettings struct { AccessKeySecret *string `json:"access_key_secret,omitempty"` } +// IsConfigured reports whether any load-bearing Bedrock field is set, +// indicating that the operator wants the provider to authenticate via +// AWS Bedrock rather than as a bearer-token Anthropic provider. +// +// Model and SmallFastModel are intentionally excluded: they have +// deployment-level defaults declared in codersdk/deployment.go, so +// they're always non-empty in a real deployment and cannot serve as +// a detection signal. Region and credentials have no defaults and +// therefore reliably indicate operator intent. Credentials alone are +// not required because Bedrock can also authenticate via the AWS +// environment (instance profile, AWS_PROFILE, IRSA, etc.). +func (b AIProviderBedrockSettings) IsConfigured() bool { + if b.Region != "" { + return true + } + if b.AccessKey != nil && *b.AccessKey != "" { + return true + } + if b.AccessKeySecret != nil && *b.AccessKeySecret != "" { + return true + } + return false +} + +// NewAIProviderBedrockSettings builds an AIProviderBedrockSettings, +// promoting non-empty credential strings to pointers so callers don't +// have to repeat the "set field iff non-empty" boilerplate. Empty +// credentials are left nil, matching the PATCH-omit semantics of the +// pointer-typed fields. +func NewAIProviderBedrockSettings(region, accessKey, accessKeySecret, model, smallFastModel string) AIProviderBedrockSettings { + s := AIProviderBedrockSettings{ + Region: region, + Model: model, + SmallFastModel: smallFastModel, + } + if accessKey != "" { + s.AccessKey = &accessKey + } + if accessKeySecret != "" { + s.AccessKeySecret = &accessKeySecret + } + return s +} + +// IsBedrockConfigured reports whether the combination of the parent +// provider's BaseURL and AIProviderBedrockSettings indicates a Bedrock +// provider. BaseURL alone (e.g. a custom VPC or FIPS endpoint with +// credentials resolved via the AWS environment) is sufficient. +// +// Use this rather than AIProviderBedrockSettings.IsConfigured() when +// BaseURL is available; the seed, the runtime config builder, and the +// legacy validator must all agree on what counts as a Bedrock provider. +func IsBedrockConfigured(baseURL string, b AIProviderBedrockSettings) bool { + return baseURL != "" || b.IsConfigured() +} + func (AIProviderBedrockSettings) settingsType() string { return AIProviderSettingsTypeBedrock } diff --git a/enterprise/cli/aibridged.go b/enterprise/cli/aibridged.go index 4089eda7e0..038a1416b2 100644 --- a/enterprise/cli/aibridged.go +++ b/enterprise/cli/aibridged.go @@ -174,9 +174,6 @@ func buildProviders(cfg codersdk.AIBridgeConfig) ([]aibridge.Provider, error) { // AIProviderConfig into an aibridge AWSBedrockConfig. // Returns nil if no Bedrock fields are set. func bedrockConfigFromProvider(p codersdk.AIProviderConfig) *aibridge.AWSBedrockConfig { - if p.BedrockRegion == "" && p.BedrockBaseURL == "" && len(p.BedrockAccessKeys) == 0 && len(p.BedrockAccessKeySecrets) == 0 { - return nil - } // Currently, only the first key pair is used, if any. // TODO(ssncferreira): pass a keypool.Pool instead. var accessKey, accessKeySecret string @@ -186,6 +183,13 @@ func bedrockConfigFromProvider(p codersdk.AIProviderConfig) *aibridge.AWSBedrock if len(p.BedrockAccessKeySecrets) > 0 { accessKeySecret = p.BedrockAccessKeySecrets[0] } + settings := codersdk.NewAIProviderBedrockSettings( + p.BedrockRegion, accessKey, accessKeySecret, + p.BedrockModel, p.BedrockSmallFastModel, + ) + if !codersdk.IsBedrockConfigured(p.BedrockBaseURL, settings) { + return nil + } return &aibridge.AWSBedrockConfig{ BaseURL: p.BedrockBaseURL, Region: p.BedrockRegion, @@ -197,11 +201,17 @@ func bedrockConfigFromProvider(p codersdk.AIProviderConfig) *aibridge.AWSBedrock } func getBedrockConfig(cfg codersdk.AIBridgeBedrockConfig) *aibridge.AWSBedrockConfig { - // Bedrock is considered disabled when no region or base URL is configured. - // Static credentials are optional. When not provided, the AWS SDK default - // credential chain resolves credentials (environment variables, shared config, - // IAM roles, etc.). - if cfg.Region.String() == "" && cfg.BaseURL.String() == "" { + // codersdk.IsBedrockConfigured decides what counts as Bedrock; when + // it returns false, the AWS SDK default credential chain (env vars, + // shared config, IAM roles, etc.) is left to resolve credentials. + settings := codersdk.NewAIProviderBedrockSettings( + cfg.Region.String(), + cfg.AccessKey.String(), + cfg.AccessKeySecret.String(), + cfg.Model.String(), + cfg.SmallFastModel.String(), + ) + if !codersdk.IsBedrockConfigured(cfg.BaseURL.String(), settings) { return nil } diff --git a/enterprise/cli/server.go b/enterprise/cli/server.go index 23b5c6b57e..8f47431884 100644 --- a/enterprise/cli/server.go +++ b/enterprise/cli/server.go @@ -162,7 +162,9 @@ func (r *RootCmd) Server(_ func()) *serpent.Command { closers.Add(usageCron) // Build the provider list and start AI Bridge daemons only when - // at least one of the bridge or proxy features is enabled. + // at least one of the bridge or proxy features is enabled. The + // ai_providers env-seed runs unconditionally in the AGPL + // codepath, regardless of whether bridge or proxy are enabled. bridgeEnabled := options.DeploymentValues.AI.BridgeConfig.Enabled.Value() proxyEnabled := options.DeploymentValues.AI.BridgeProxyConfig.Enabled.Value() if bridgeEnabled || proxyEnabled {