mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix(coderd): block ai provider env key drift (#25849)
Previously, `SeedAIProvidersFromEnv` only hashed provider-level fields,
so env var key changes were silently ignored once a provider already
existed in the database.
Include bearer keys and Bedrock credentials in the canonical drift hash,
and cover multi-key, multi-provider cases so restarts now fail loudly
when the configured credentials no longer match what is stored.
When changing a key, you'll now see this in the server startup logs:
```
2026-05-29 12:29:02.674 [info] api: Encountered an error running "coder server", see "coder server --help" for more information
2026-05-29 12:29:02.674 [info] api: error: create coder API:
2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/cli.(*RootCmd).Server.func2
2026-05-29 12:29:02.674 [info] api: /home/coder/coder/cli/server.go:1015
2026-05-29 12:29:02.674 [info] api: - seed ai providers from env:
2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/enterprise/cli.(*RootCmd).Server.func1
2026-05-29 12:29:02.674 [info] api: /home/coder/coder/enterprise/cli/server.go:187
2026-05-29 12:29:02.674 [info] api: - execute transaction:
2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/coderd/database.(*sqlQuerier).runTx
2026-05-29 12:29:02.674 [info] api: /home/coder/coder/coderd/database/db.go:212
---> 2026-05-29 12:29:02.674 [info] api: - AI provider "vercel" already exists in the database and differs from the current environment configuration; update the provider through the API or remove the CODER_AIBRIDGE_* env vars to stop seeding it:
2026-05-29 12:29:02.674 [info] api: github.com/coder/coder/v2/coderd.SeedAIProvidersFromEnv.func1
2026-05-29 12:29:02.674 [info] api: /home/coder/coder/coderd/ai_providers_migrate.go:139
2026-05-29 12:29:02.674 [info] api: slogjson: failed to write entry: io: read/write on closed pipe
2026-05-29 12:29:02.700 [info] dlv: Stop reason: exited
2026-05-29 12:29:02.825 [info] site: ELIFECYCLE Command failed.
error: running command "develop": server did not become ready in 1m0s:
main.waitForHealthy
/home/coder/coder/scripts/develop/main.go:877
- context canceled
```
_This PR was generated with Coder Agents._
This commit is contained in:
@@ -116,10 +116,21 @@ func SeedAIProvidersFromEnv(
|
||||
if err != nil {
|
||||
return xerrors.Errorf("decode existing settings for %q: %w", dp.Name, err)
|
||||
}
|
||||
// Load existing bearer keys so the canonical hash
|
||||
// includes credentials for comparison.
|
||||
existingKeyRows, err := tx.GetAIProviderKeysByProviderID(sysCtx, existing.ID)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("load existing keys for %q: %w", dp.Name, err)
|
||||
}
|
||||
existingKeys := make([]string, 0, len(existingKeyRows))
|
||||
for _, k := range existingKeyRows {
|
||||
existingKeys = append(existingKeys, k.APIKey)
|
||||
}
|
||||
existingDP := desiredAIProvider{
|
||||
Type: existing.Type,
|
||||
BaseURL: existing.BaseUrl,
|
||||
Bedrock: existingSettings.Bedrock,
|
||||
Keys: existingKeys,
|
||||
}
|
||||
existingHash := computeProviderHash(existingDP.canonical())
|
||||
if existingHash == dp.Hash {
|
||||
@@ -196,18 +207,15 @@ func SeedAIProvidersFromEnv(
|
||||
// canonicalAIProvider is the shape we hash to detect drift between the
|
||||
// configured environment and the row stored in the database. The fields
|
||||
// we hash are exactly the operator-controllable inputs that affect
|
||||
// runtime behavior. Credentials are intentionally NOT part of the hash
|
||||
// so operators can rotate them via the API without forcing a server
|
||||
// restart. This applies to both bearer API keys (stored in
|
||||
// ai_provider_keys) and to Bedrock access key/secret pairs (stored in
|
||||
// the settings blob because Bedrock authenticates via settings rather
|
||||
// than a bearer token).
|
||||
// runtime behavior, including credentials.
|
||||
//
|
||||
// Model and SmallFastModel are excluded: they're tunables, and their
|
||||
// serpent defaults shift across releases.
|
||||
type canonicalAIProvider struct {
|
||||
Type string `json:"type"`
|
||||
BaseURL string `json:"base_url"`
|
||||
BedrockRegion string `json:"bedrock_region"`
|
||||
KeysHash string `json:"keys_hash"`
|
||||
}
|
||||
|
||||
// desiredAIProvider is a normalized provider description sourced from
|
||||
@@ -235,9 +243,39 @@ func (d desiredAIProvider) canonical() canonicalAIProvider {
|
||||
if d.Bedrock != nil {
|
||||
c.BedrockRegion = d.Bedrock.Region
|
||||
}
|
||||
c.KeysHash = computeKeysHash(d.Keys, d.Bedrock)
|
||||
return c
|
||||
}
|
||||
|
||||
// computeKeysHash produces a deterministic hash over the bearer API
|
||||
// keys and, for Bedrock providers, the access key and secret.
|
||||
func computeKeysHash(bearerKeys []string, bedrock *codersdk.AIProviderBedrockSettings) string {
|
||||
// Collect all credential material in a deterministic order.
|
||||
// Bearer keys are sorted so reordering in env vars does not
|
||||
// trigger a false-positive drift.
|
||||
sorted := make([]string, len(bearerKeys))
|
||||
copy(sorted, bearerKeys)
|
||||
slices.Sort(sorted)
|
||||
|
||||
h := sha256.New()
|
||||
for _, k := range sorted {
|
||||
_, _ = h.Write([]byte(k))
|
||||
// Separator so "ab"+"c" != "a"+"bc".
|
||||
_, _ = h.Write([]byte{0})
|
||||
}
|
||||
if bedrock != nil {
|
||||
if bedrock.AccessKey != nil {
|
||||
_, _ = h.Write([]byte(*bedrock.AccessKey))
|
||||
}
|
||||
_, _ = h.Write([]byte{0})
|
||||
if bedrock.AccessKeySecret != nil {
|
||||
_, _ = h.Write([]byte(*bedrock.AccessKeySecret))
|
||||
}
|
||||
_, _ = h.Write([]byte{0})
|
||||
}
|
||||
return hex.EncodeToString(h.Sum(nil))
|
||||
}
|
||||
|
||||
func computeProviderHash(c canonicalAIProvider) string {
|
||||
// json.Marshal is deterministic for structs because field order is
|
||||
// fixed by the struct definition.
|
||||
|
||||
@@ -91,21 +91,23 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
}
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
// Changing the API key alone does NOT count as drift: keys
|
||||
// live in a separate table and operators rotate them via the
|
||||
// API. Only changes to non-credential provider-level fields
|
||||
// (base_url, type, Bedrock region/model) trip the drift check.
|
||||
// Changing the API key counts as drift: keys are included
|
||||
// in the canonical hash so operators notice when env-var
|
||||
// credential changes are ignored by an existing provider.
|
||||
cfg.LegacyOpenAI.Key = serpent.String("sk-rotated")
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
// Changing the base URL is real drift.
|
||||
cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2")
|
||||
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
|
||||
// Changing the base URL is also real drift.
|
||||
cfg.LegacyOpenAI.Key = serpent.String("sk-original")
|
||||
cfg.LegacyOpenAI.BaseURL = serpent.String("https://api.openai.com/v2")
|
||||
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
})
|
||||
|
||||
t.Run("BedrockCredentialRotationIsNotDrift", func(t *testing.T) {
|
||||
t.Run("BedrockCredentialChangeIsDrift", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
@@ -120,17 +122,20 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
}
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
// Rotating the Bedrock access key and secret in env must NOT
|
||||
// trip the drift check: they're credentials, equivalent to
|
||||
// bearer API keys, and operators rotate them via the API.
|
||||
// Rotating the Bedrock access key in env trips the drift
|
||||
// check so operators know the change did not take effect.
|
||||
cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-rotated")
|
||||
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-rotated")
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
|
||||
// Changing the Bedrock region (a non-credential field) is
|
||||
// real drift.
|
||||
// also real drift.
|
||||
cfg.LegacyBedrock.AccessKey = serpent.String("AKIA-original")
|
||||
cfg.LegacyBedrock.AccessKeySecret = serpent.String("secret-original")
|
||||
cfg.LegacyBedrock.Region = serpent.String("us-west-2")
|
||||
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
})
|
||||
@@ -293,6 +298,57 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
require.Equal(t, "sk-ant-1", anKeys[0].APIKey)
|
||||
})
|
||||
|
||||
t.Run("IndexedProvidersKeyDriftWithMultipleKeysAndProviders", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
cfg := codersdk.AIBridgeConfig{
|
||||
Providers: []codersdk.AIProviderConfig{
|
||||
{
|
||||
Type: "openai",
|
||||
Name: "primary-openai",
|
||||
BaseURL: "https://api.openai.com/v1",
|
||||
Keys: []string{"sk-openai-1", "sk-openai-2"},
|
||||
},
|
||||
{
|
||||
Type: "anthropic",
|
||||
Name: "primary-anthropic",
|
||||
BaseURL: "https://api.anthropic.com/",
|
||||
Keys: []string{"sk-ant-1", "sk-ant-2"},
|
||||
},
|
||||
},
|
||||
}
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
// Reordering keys must not count as drift. The canonical hash
|
||||
// sorts keys before hashing, so equivalent key sets remain
|
||||
// stable across restarts.
|
||||
cfg.Providers[0].Keys = []string{"sk-openai-2", "sk-openai-1"}
|
||||
cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-1"}
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
// Changing one key on one provider must block startup even
|
||||
// when multiple providers are configured.
|
||||
cfg.Providers[1].Keys = []string{"sk-ant-2", "sk-ant-rotated"}
|
||||
err := coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
require.Contains(t, err.Error(), `"primary-anthropic"`)
|
||||
|
||||
oa, err := db.GetAIProviderByName(ctx, "primary-openai")
|
||||
require.NoError(t, err)
|
||||
oaKeys, err := db.GetAIProviderKeysByProviderID(ctx, oa.ID)
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, []string{"sk-openai-1", "sk-openai-2"}, []string{oaKeys[0].APIKey, oaKeys[1].APIKey})
|
||||
|
||||
an, err := db.GetAIProviderByName(ctx, "primary-anthropic")
|
||||
require.NoError(t, err)
|
||||
anKeys, err := db.GetAIProviderKeysByProviderID(ctx, an.ID)
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, []string{"sk-ant-1", "sk-ant-2"}, []string{anKeys[0].APIKey, anKeys[1].APIKey})
|
||||
})
|
||||
|
||||
t.Run("BedrockIndexedProviderHasNoKeys", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
@@ -424,7 +480,7 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
require.Empty(t, all, "expected no active rows after soft-delete + re-seed")
|
||||
})
|
||||
|
||||
t.Run("ExistingKeysArePreserved", func(t *testing.T) {
|
||||
t.Run("ExistingKeysBlockOnDrift", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
@@ -440,15 +496,17 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
row, err := db.GetAIProviderByName(ctx, "openai")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Operator rotates the env key. The seed must not duplicate
|
||||
// keys on a row that already exists; the new key is only
|
||||
// installed via the API/CRUD layer in this flow.
|
||||
// Operator rotates the env key. The seed now blocks startup
|
||||
// because the keys differ, alerting the operator.
|
||||
cfg.LegacyOpenAI.Key = serpent.String("sk-rotated")
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
err = coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t))
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "differs from the current environment configuration")
|
||||
|
||||
// The original key is still in the database.
|
||||
keys, err := db.GetAIProviderKeysByProviderID(ctx, row.ID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, keys, 1, "env reseed must not duplicate keys on existing rows")
|
||||
require.Len(t, keys, 1)
|
||||
require.Equal(t, "sk-original", keys[0].APIKey)
|
||||
})
|
||||
|
||||
@@ -482,6 +540,40 @@ func TestSeedAIProvidersFromEnv(t *testing.T) {
|
||||
require.Len(t, all, 1, "duplicate indexed entries with matching hash must produce a single row")
|
||||
})
|
||||
|
||||
t.Run("IndexedDuplicateNameMatchingHashDedupesReorderedKeys", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
// Key order should not affect the canonical hash. Reordered
|
||||
// duplicates under the same name should still dedupe.
|
||||
cfg := codersdk.AIBridgeConfig{
|
||||
Providers: []codersdk.AIProviderConfig{
|
||||
{
|
||||
Type: "openai",
|
||||
Name: "shared",
|
||||
BaseURL: "https://api.openai.com/v1",
|
||||
Keys: []string{"sk-1", "sk-2"},
|
||||
},
|
||||
{
|
||||
Type: "openai",
|
||||
Name: "shared",
|
||||
BaseURL: "https://api.openai.com/v1",
|
||||
Keys: []string{"sk-2", "sk-1"},
|
||||
},
|
||||
},
|
||||
}
|
||||
require.NoError(t, coderd.SeedAIProvidersFromEnv(ctx, db, cfg, testLogger(t)))
|
||||
|
||||
all, err := db.GetAIProviders(ctx, database.GetAIProvidersParams{})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, all, 1)
|
||||
keys, err := db.GetAIProviderKeysByProviderID(ctx, all[0].ID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, keys, 2)
|
||||
require.ElementsMatch(t, []string{"sk-1", "sk-2"}, []string{keys[0].APIKey, keys[1].APIKey})
|
||||
})
|
||||
|
||||
t.Run("IndexedDuplicateNameMismatchingHashFails", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
db, _ := dbtestutil.NewDB(t)
|
||||
|
||||
Reference in New Issue
Block a user