From 110210d7c96dbd5c843bd7d817104d521ea38407 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 29 May 2026 15:14:55 +0200 Subject: [PATCH] fix(coderd): block ai provider env key drift (#25849) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, `SeedAIProvidersFromEnv` only hashed provider-level fields, so env var key changes were silently ignored once a provider already existed in the database. Include bearer keys and Bedrock credentials in the canonical drift hash, and cover multi-key, multi-provider cases so restarts now fail loudly when the configured credentials no longer match what is stored. When changing a key, you'll now see this in the server startup logs: ``` 2026-05-29 12:29:02.674 [info] api: Encountered an error running "coder server", see "coder server --help" for more information 2026-05-29 12:29:02.674 [info] api: error: create coder API: 2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/cli.(*RootCmd).Server.func2 2026-05-29 12:29:02.674 [info] api: /home/coder/coder/cli/server.go:1015 2026-05-29 12:29:02.674 [info] api: - seed ai providers from env: 2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/enterprise/cli.(*RootCmd).Server.func1 2026-05-29 12:29:02.674 [info] api: /home/coder/coder/enterprise/cli/server.go:187 2026-05-29 12:29:02.674 [info] api: - execute transaction: 2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/coderd/database.(*sqlQuerier).runTx 2026-05-29 12:29:02.674 [info] api: /home/coder/coder/coderd/database/db.go:212 ---> 2026-05-29 12:29:02.674 [info] api: - AI provider "vercel" already exists in the database and differs from the current environment configuration; update the provider through the API or remove the CODER_AIBRIDGE_* env vars to stop seeding it: 2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/coderd.SeedAIProvidersFromEnv.func1 2026-05-29 12:29:02.674 [info] api: /home/coder/coder/coderd/ai_providers_migrate.go:139 2026-05-29 12:29:02.674 [info] api: slogjson: failed to write entry: io: read/write on closed pipe 2026-05-29 12:29:02.700 [info] dlv: Stop reason: exited 2026-05-29 12:29:02.825 [info] site:  ELIFECYCLE  Command failed. error: running command "develop": server did not become ready in 1m0s: main.waitForHealthy /home/coder/coder/scripts/develop/main.go:877 - context canceled ``` _This PR was generated with Coder Agents._ --- coderd/ai_providers_migrate.go | 50 +++++++++-- coderd/ai_providers_migrate_test.go | 134 +++++++++++++++++++++++----- 2 files changed, 157 insertions(+), 27 deletions(-) diff --git a/coderd/ai_providers_migrate.go b/coderd/ai_providers_migrate.go index 6bc99ad840..055877ecce 100644 --- a/coderd/ai_providers_migrate.go +++ b/coderd/ai_providers_migrate.go @@ -116,10 +116,21 @@ func SeedAIProvidersFromEnv( if err != nil { return xerrors.Errorf("decode existing settings for %q: %w", dp.Name, err) } + // Load existing bearer keys so the canonical hash + // includes credentials for comparison. + existingKeyRows, err := tx.GetAIProviderKeysByProviderID(sysCtx, existing.ID) + if err != nil { + return xerrors.Errorf("load existing keys for %q: %w", dp.Name, err) + } + existingKeys := make([]string, 0, len(existingKeyRows)) + for _, k := range existingKeyRows { + existingKeys = append(existingKeys, k.APIKey) + } existingDP := desiredAIProvider{ Type: existing.Type, BaseURL: existing.BaseUrl, Bedrock: existingSettings.Bedrock, + Keys: existingKeys, } existingHash := computeProviderHash(existingDP.canonical()) if existingHash == dp.Hash { @@ -196,18 +207,15 @@ func SeedAIProvidersFromEnv( // canonicalAIProvider is the shape we hash to detect drift between the // configured environment and the row stored in the database. The fields // we hash are exactly the operator-controllable inputs that affect -// runtime behavior. Credentials are intentionally NOT part of the hash -// so operators can rotate them via the API without forcing a server -// restart. This applies to both bearer API keys (stored in -// ai_provider_keys) and to Bedrock access key/secret pairs (stored in -// the settings blob because Bedrock authenticates via settings rather -// than a bearer token). +// runtime behavior, including credentials. +// // Model and SmallFastModel are excluded: they're tunables, and their // serpent defaults shift across releases. type canonicalAIProvider struct { Type string `json:"type"` BaseURL string `json:"base_url"` BedrockRegion string `json:"bedrock_region"` + KeysHash string `json:"keys_hash"` } // desiredAIProvider is a normalized provider description sourced from @@ -235,9 +243,39 @@ func (d desiredAIProvider) canonical() canonicalAIProvider { if d.Bedrock != nil { c.BedrockRegion = d.Bedrock.Region } + c.KeysHash = computeKeysHash(d.Keys, d.Bedrock) return c } +// computeKeysHash produces a deterministic hash over the bearer API +// keys and, for Bedrock providers, the access key and secret. +func computeKeysHash(bearerKeys []string, bedrock *codersdk.AIProviderBedrockSettings) string { + // Collect all credential material in a deterministic order. + // Bearer keys are sorted so reordering in env vars does not + // trigger a false-positive drift. + sorted := make([]string, len(bearerKeys)) + copy(sorted, bearerKeys) + slices.Sort(sorted) + + h := sha256.New() + for _, k := range sorted { + _, _ = h.Write([]byte(k)) + // Separator so "ab"+"c" != "a"+"bc". + _, _ = h.Write([]byte{0}) + } + if bedrock != nil { + if bedrock.AccessKey != nil { + _, _ = h.Write([]byte(*bedrock.AccessKey)) + } + _, _ = h.Write([]byte{0}) + if bedrock.AccessKeySecret != nil { + _, _ = h.Write([]byte(*bedrock.AccessKeySecret)) + } + _, _ = h.Write([]byte{0}) + } + return hex.EncodeToString(h.Sum(nil)) +} + func computeProviderHash(c canonicalAIProvider) string { // json.Marshal is deterministic for structs because field order is // fixed by the struct definition. diff --git a/coderd/ai_providers_migrate_test.go b/coderd/ai_providers_migrate_test.go index d4a07bfdc2..89165002b0 100644 --- a/coderd/ai_providers_migrate_test.go +++ b/coderd/ai_providers_migrate_test.go @@ -91,21 +91,23 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { } require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) - // Changing the API key alone does NOT count as drift: keys - // live in a separate table and operators rotate them via the - // API. Only changes to non-credential provider-level fields - // (base_url, type, Bedrock region/model) trip the drift check. + // Changing the API key counts as drift: keys are included + // in the canonical hash so operators notice when env-var + // credential changes are ignored by an existing provider. cfg.LegacyOpenAI.Key = serpent.String("sk-rotated") - require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) - - // Changing the base URL is real drift. - cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2") err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) require.Error(t, err) require.Contains(t, err.Error(), "differs from the current environment configuration") + + // Changing the base URL is also real drift. + cfg.LegacyOpenAI.Key = serpent.String("sk-original") + cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2") + err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") }) - t.Run("BedrockCredentialRotationIsNotDrift", func(t *testing.T) { + t.Run("BedrockCredentialChangeIsDrift", func(t *testing.T) { t.Parallel() db, _ := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitShort) @@ -120,17 +122,20 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { } require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) - // Rotating the Bedrock access key and secret in env must NOT - // trip the drift check: they're credentials, equivalent to - // bearer API keys, and operators rotate them via the API. + // Rotating the Bedrock access key in env trips the drift + // check so operators know the change did not take effect. cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-rotated") cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-rotated") - require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") // Changing the Bedrock region (a non-credential field) is - // real drift. + // also real drift. + cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-original") + cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-original") cfg.LegacyBedrock.Region = serpent.String("us-west-2") - err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) + err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) require.Error(t, err) require.Contains(t, err.Error(), "differs from the current environment configuration") }) @@ -293,6 +298,57 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { require.Equal(t, "sk-ant-1", anKeys[0].APIKey) }) + t.Run("IndexedProvidersKeyDriftWithMultipleKeysAndProviders", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "primary-openai", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-openai-1", "sk-openai-2"}, + }, + { + Type: "anthropic", + Name: "primary-anthropic", + BaseURL: "https://api.anthropic.com/", + Keys: []string{"sk-ant-1", "sk-ant-2"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) + + // Reordering keys must not count as drift. The canonical hash + // sorts keys before hashing, so equivalent key sets remain + // stable across restarts. + cfg.Providers[0].Keys = []string{"sk-openai-2", "sk-openai-1"} + cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-1"} + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) + + // Changing one key on one provider must block startup even + // when multiple providers are configured. + cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-rotated"} + err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") + require.Contains(t, err.Error(), `"primary-anthropic"`) + + oa, err := db.GetAIProviderByName(ctx, "primary-openai") + require.NoError(t, err) + oaKeys, err := db.GetAIProviderKeysByProviderID(ctx, oa.ID) + require.NoError(t, err) + require.ElementsMatch(t, []string{"sk-openai-1", "sk-openai-2"}, []string{oaKeys[0].APIKey, oaKeys[1].APIKey}) + + an, err := db.GetAIProviderByName(ctx, "primary-anthropic") + require.NoError(t, err) + anKeys, err := db.GetAIProviderKeysByProviderID(ctx, an.ID) + require.NoError(t, err) + require.ElementsMatch(t, []string{"sk-ant-1", "sk-ant-2"}, []string{anKeys[0].APIKey, anKeys[1].APIKey}) + }) + t.Run("BedrockIndexedProviderHasNoKeys", func(t *testing.T) { t.Parallel() db, _ := dbtestutil.NewDB(t) @@ -424,7 +480,7 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { require.Empty(t, all, "expected no active rows after soft-delete + re-seed") }) - t.Run("ExistingKeysArePreserved", func(t *testing.T) { + t.Run("ExistingKeysBlockOnDrift", func(t *testing.T) { t.Parallel() db, _ := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitShort) @@ -440,15 +496,17 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { row, err := db.GetAIProviderByName(ctx, "openai") require.NoError(t, err) - // Operator rotates the env key. The seed must not duplicate - // keys on a row that already exists; the new key is only - // installed via the API/CRUD layer in this flow. + // Operator rotates the env key. The seed now blocks startup + // because the keys differ, alerting the operator. cfg.LegacyOpenAI.Key = serpent.String("sk-rotated") - require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) + err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "differs from the current environment configuration") + // The original key is still in the database. keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID) require.NoError(t, err) - require.Len(t, keys, 1, "env reseed must not duplicate keys on existing rows") + require.Len(t, keys, 1) require.Equal(t, "sk-original", keys[0].APIKey) }) @@ -482,6 +540,40 @@ func TestSeedAIProvidersFromEnv(t *testing.T) { require.Len(t, all, 1, "duplicate indexed entries with matching hash must produce a single row") }) + t.Run("IndexedDuplicateNameMatchingHashDedupesReorderedKeys", func(t *testing.T) { + t.Parallel() + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + + // Key order should not affect the canonical hash. Reordered + // duplicates under the same name should still dedupe. + cfg := codersdk.AIBridgeConfig{ + Providers: []codersdk.AIProviderConfig{ + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-1", "sk-2"}, + }, + { + Type: "openai", + Name: "shared", + BaseURL: "https://api.openai.com/v1", + Keys: []string{"sk-2", "sk-1"}, + }, + }, + } + require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))) + + all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{}) + require.NoError(t, err) + require.Len(t, all, 1) + keys, err := db.GetAIProviderKeysByProviderID(ctx, all[0].ID) + require.NoError(t, err) + require.Len(t, keys, 2) + require.ElementsMatch(t, []string{"sk-1", "sk-2"}, []string{keys[0].APIKey, keys[1].APIKey}) + }) + t.Run("IndexedDuplicateNameMismatchingHashFails", func(t *testing.T) { t.Parallel() db, _ := dbtestutil.NewDB(t)