fix(coderd): block ai provider env key drift (#25849)

Previously, `SeedAIProvidersFromEnv` only hashed provider-level fields,
so env var key changes were silently ignored once a provider already
existed in the database.

Include bearer keys and Bedrock credentials in the canonical drift hash,
and cover multi-key, multi-provider cases so restarts now fail loudly
when the configured credentials no longer match what is stored.

When changing a key, you'll now see this in the server startup logs:

```
2026-05-29 12:29:02.674 [info]  api: Encountered an error running "coder server", see "coder server --help" for more information
2026-05-29 12:29:02.674 [info]  api: error: create coder API:
2026-05-29 12:29:02.674 [info]  api: github.com/coder/coder/v2/cli.(*RootCmd).Server.func2
2026-05-29 12:29:02.674 [info]  api: /home/coder/coder/cli/server.go:1015
2026-05-29 12:29:02.674 [info]  api: - seed ai providers from env:
2026-05-29 12:29:02.674 [info]  api: github.com/coder/coder/v2/enterprise/cli.(*RootCmd).Server.func1
2026-05-29 12:29:02.674 [info]  api: /home/coder/coder/enterprise/cli/server.go:187
2026-05-29 12:29:02.674 [info]  api: - execute transaction:
2026-05-29 12:29:02.674 [info]  api: github.com/coder/coder/v2/coderd/database.(*sqlQuerier).runTx
2026-05-29 12:29:02.674 [info]  api: /home/coder/coder/coderd/database/db.go:212
---> 2026-05-29 12:29:02.674 [info]  api: - AI provider "vercel" already exists in the database and differs from the current environment configuration; update the provider through the API or remove the CODER_AIBRIDGE_* env vars to stop seeding it:
2026-05-29 12:29:02.674 [info]  api: github.com/coder/coder/v2/coderd.SeedAIProvidersFromEnv.func1
2026-05-29 12:29:02.674 [info]  api: /home/coder/coder/coderd/ai_providers_migrate.go:139
2026-05-29 12:29:02.674 [info]  api: slogjson: failed to write entry: io: read/write on closed pipe
2026-05-29 12:29:02.700 [info]  dlv: Stop reason: exited
2026-05-29 12:29:02.825 [info]  site:  ELIFECYCLE  Command failed.
error: running command "develop": server did not become ready in 1m0s:
    main.waitForHealthy
        /home/coder/coder/scripts/develop/main.go:877
  - context canceled
```

_This PR was generated with Coder Agents._
This commit is contained in:
Danny Kopping
2026-05-29 15:14:55 +02:00
committed by GitHub
parent d0a51da0a9
commit 110210d7c9
2 changed files with 157 additions and 27 deletions
+44 -6
View File
@@ -116,10 +116,21 @@ func SeedAIProvidersFromEnv(
if err != nil {
return xerrors.Errorf("decode existing settings for %q: %w", dp.Name, err)
}
// Load existing bearer keys so the canonical hash
// includes credentials for comparison.
existingKeyRows, err := tx.GetAIProviderKeysByProviderID(sysCtx, existing.ID)
if err != nil {
return xerrors.Errorf("load existing keys for %q: %w", dp.Name, err)
}
existingKeys := make([]string, 0, len(existingKeyRows))
for _, k := range existingKeyRows {
existingKeys = append(existingKeys, k.APIKey)
}
existingDP := desiredAIProvider{
Type: existing.Type,
BaseURL: existing.BaseUrl,
Bedrock: existingSettings.Bedrock,
Keys: existingKeys,
}
existingHash := computeProviderHash(existingDP.canonical())
if existingHash == dp.Hash {
@@ -196,18 +207,15 @@ func SeedAIProvidersFromEnv(
// canonicalAIProvider is the shape we hash to detect drift between the
// configured environment and the row stored in the database. The fields
// we hash are exactly the operator-controllable inputs that affect
// runtime behavior. Credentials are intentionally NOT part of the hash
// so operators can rotate them via the API without forcing a server
// restart. This applies to both bearer API keys (stored in
// ai_provider_keys) and to Bedrock access key/secret pairs (stored in
// the settings blob because Bedrock authenticates via settings rather
// than a bearer token).
// runtime behavior, including credentials.
//
// Model and SmallFastModel are excluded: they're tunables, and their
// serpent defaults shift across releases.
type canonicalAIProvider struct {
Type string `json:"type"`
BaseURL string `json:"base_url"`
BedrockRegion string `json:"bedrock_region"`
KeysHash string `json:"keys_hash"`
}
// desiredAIProvider is a normalized provider description sourced from
@@ -235,9 +243,39 @@ func (d desiredAIProvider) canonical() canonicalAIProvider {
if d.Bedrock != nil {
c.BedrockRegion = d.Bedrock.Region
}
c.KeysHash = computeKeysHash(d.Keys, d.Bedrock)
return c
}
// computeKeysHash produces a deterministic hash over the bearer API
// keys and, for Bedrock providers, the access key and secret.
func computeKeysHash(bearerKeys []string, bedrock *codersdk.AIProviderBedrockSettings) string {
// Collect all credential material in a deterministic order.
// Bearer keys are sorted so reordering in env vars does not
// trigger a false-positive drift.
sorted := make([]string, len(bearerKeys))
copy(sorted, bearerKeys)
slices.Sort(sorted)
h := sha256.New()
for _, k := range sorted {
_, _ = h.Write([]byte(k))
// Separator so "ab"+"c" != "a"+"bc".
_, _ = h.Write([]byte{0})
}
if bedrock != nil {
if bedrock.AccessKey != nil {
_, _ = h.Write([]byte(*bedrock.AccessKey))
}
_, _ = h.Write([]byte{0})
if bedrock.AccessKeySecret != nil {
_, _ = h.Write([]byte(*bedrock.AccessKeySecret))
}
_, _ = h.Write([]byte{0})
}
return hex.EncodeToString(h.Sum(nil))
}
func computeProviderHash(c canonicalAIProvider) string {
// json.Marshal is deterministic for structs because field order is
// fixed by the struct definition.
+113 -21
View File
@@ -91,21 +91,23 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
}
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
// Changing the API key alone does NOT count as drift: keys
// live in a separate table and operators rotate them via the
// API. Only changes to non-credential provider-level fields
// (base_url, type, Bedrock region/model) trip the drift check.
// Changing the API key counts as drift: keys are included
// in the canonical hash so operators notice when env-var
// credential changes are ignored by an existing provider.
cfg.LegacyOpenAI.Key = serpent.String("sk-rotated")
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
// Changing the base URL is real drift.
cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2")
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
// Changing the base URL is also real drift.
cfg.LegacyOpenAI.Key = serpent.String("sk-original")
cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2")
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
})
t.Run("BedrockCredentialRotationIsNotDrift", func(t *testing.T) {
t.Run("BedrockCredentialChangeIsDrift", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
@@ -120,17 +122,20 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
}
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
// Rotating the Bedrock access key and secret in env must NOT
// trip the drift check: they're credentials, equivalent to
// bearer API keys, and operators rotate them via the API.
// Rotating the Bedrock access key in env trips the drift
// check so operators know the change did not take effect.
cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-rotated")
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-rotated")
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
// Changing the Bedrock region (a non-credential field) is
// real drift.
// also real drift.
cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-original")
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-original")
cfg.LegacyBedrock.Region = serpent.String("us-west-2")
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
})
@@ -293,6 +298,57 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
require.Equal(t, "sk-ant-1", anKeys[0].APIKey)
})
t.Run("IndexedProvidersKeyDriftWithMultipleKeysAndProviders", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.AIBridgeConfig{
Providers: []codersdk.AIProviderConfig{
{
Type: "openai",
Name: "primary-openai",
BaseURL: "https://api.openai.com/v1",
Keys: []string{"sk-openai-1", "sk-openai-2"},
},
{
Type: "anthropic",
Name: "primary-anthropic",
BaseURL: "https://api.anthropic.com/",
Keys: []string{"sk-ant-1", "sk-ant-2"},
},
},
}
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
// Reordering keys must not count as drift. The canonical hash
// sorts keys before hashing, so equivalent key sets remain
// stable across restarts.
cfg.Providers[0].Keys = []string{"sk-openai-2", "sk-openai-1"}
cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-1"}
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
// Changing one key on one provider must block startup even
// when multiple providers are configured.
cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-rotated"}
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
require.Contains(t, err.Error(), `"primary-anthropic"`)
oa, err := db.GetAIProviderByName(ctx, "primary-openai")
require.NoError(t, err)
oaKeys, err := db.GetAIProviderKeysByProviderID(ctx, oa.ID)
require.NoError(t, err)
require.ElementsMatch(t, []string{"sk-openai-1", "sk-openai-2"}, []string{oaKeys[0].APIKey, oaKeys[1].APIKey})
an, err := db.GetAIProviderByName(ctx, "primary-anthropic")
require.NoError(t, err)
anKeys, err := db.GetAIProviderKeysByProviderID(ctx, an.ID)
require.NoError(t, err)
require.ElementsMatch(t, []string{"sk-ant-1", "sk-ant-2"}, []string{anKeys[0].APIKey, anKeys[1].APIKey})
})
t.Run("BedrockIndexedProviderHasNoKeys", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
@@ -424,7 +480,7 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
require.Empty(t, all, "expected no active rows after soft-delete + re-seed")
})
t.Run("ExistingKeysArePreserved", func(t *testing.T) {
t.Run("ExistingKeysBlockOnDrift", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
@@ -440,15 +496,17 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
row, err := db.GetAIProviderByName(ctx, "openai")
require.NoError(t, err)
// Operator rotates the env key. The seed must not duplicate
// keys on a row that already exists; the new key is only
// installed via the API/CRUD layer in this flow.
// Operator rotates the env key. The seed now blocks startup
// because the keys differ, alerting the operator.
cfg.LegacyOpenAI.Key = serpent.String("sk-rotated")
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
require.Error(t, err)
require.Contains(t, err.Error(), "differs from the current environment configuration")
// The original key is still in the database.
keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID)
require.NoError(t, err)
require.Len(t, keys, 1, "env reseed must not duplicate keys on existing rows")
require.Len(t, keys, 1)
require.Equal(t, "sk-original", keys[0].APIKey)
})
@@ -482,6 +540,40 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
require.Len(t, all, 1, "duplicate indexed entries with matching hash must produce a single row")
})
t.Run("IndexedDuplicateNameMatchingHashDedupesReorderedKeys", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitShort)
// Key order should not affect the canonical hash. Reordered
// duplicates under the same name should still dedupe.
cfg := codersdk.AIBridgeConfig{
Providers: []codersdk.AIProviderConfig{
{
Type: "openai",
Name: "shared",
BaseURL: "https://api.openai.com/v1",
Keys: []string{"sk-1", "sk-2"},
},
{
Type: "openai",
Name: "shared",
BaseURL: "https://api.openai.com/v1",
Keys: []string{"sk-2", "sk-1"},
},
},
}
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{})
require.NoError(t, err)
require.Len(t, all, 1)
keys, err := db.GetAIProviderKeysByProviderID(ctx, all[0].ID)
require.NoError(t, err)
require.Len(t, keys, 2)
require.ElementsMatch(t, []string{"sk-1", "sk-2"}, []string{keys[0].APIKey, keys[1].APIKey})
})
t.Run("IndexedDuplicateNameMismatchingHashFails", func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)