mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix: recreate ai_provider_type instead of ADD VALUE (#25895)
Coder runs all migrations in a single transaction (`pgTxnDriver`). Postgres forbids using an enum value added by `ALTER TYPE ... ADD VALUE` within the same transaction that added it. Migration `000499` widened `ai_provider_type` with `ADD VALUE`, and `000504` casts existing `chat_providers` rows to that enum in the same transaction. On deployments with a legacy provider using one of the new values (for example `openai-compat`), the batch failed with `unsafe use of new value` and the server could not start. Recreate the type (create a new enum, alter the column, drop and rename) instead of using `ADD VALUE`, matching the existing precedent in `000144_user_status_dormant`. A freshly created enum's values are usable immediately in the same transaction, so the cast in `000504` succeeds. The resulting schema is identical, so `make gen` produces no `dump.sql` diff and databases that already applied these migrations see no drift. Added a regression test that seeds an `openai-compat` provider and applies `000499` through `000504` in a single transaction, reproducing the production path. The per-step `Stepper` used by the other migration tests commits each migration separately and cannot surface this class of bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Signed-off-by: Danny Kopping <danny@coder.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,41 @@
|
||||
dimension.
|
||||
- Avoid `ByX` names for grouped queries.
|
||||
|
||||
### Enum Changes Run in a Single Transaction
|
||||
|
||||
All migrations run inside one transaction (`pgTxnDriver`). Postgres forbids
|
||||
*using* an enum value added by `ALTER TYPE ... ADD VALUE` within the same
|
||||
transaction that added it, so it fails with `unsafe use of new value`.
|
||||
|
||||
Adding the value is fine; using it in the same batch is not. "Using it"
|
||||
includes a later migration that casts to it (`col::my_enum`), inserts or
|
||||
updates a row with it, or sets it as a column default. This only fails when a
|
||||
row actually materializes the new value, so fresh databases and CI pass while
|
||||
deployments with existing data break.
|
||||
|
||||
**MUST DO**: If any migration uses a newly added enum value, recreate the type
|
||||
instead of using `ADD VALUE`. A freshly created enum's values are usable
|
||||
immediately in the same transaction. Precedent: `000144_user_status_dormant`.
|
||||
|
||||
```sql
|
||||
CREATE TYPE new_my_enum AS ENUM ('existing', 'value', 'new_value');
|
||||
|
||||
ALTER TABLE my_table
|
||||
ALTER COLUMN col TYPE new_my_enum USING (col::text::new_my_enum);
|
||||
|
||||
DROP TYPE my_enum;
|
||||
|
||||
ALTER TYPE new_my_enum RENAME TO my_enum;
|
||||
```
|
||||
|
||||
Recreating produces an identical schema, so `make gen` yields no `dump.sql`
|
||||
diff and databases that already applied the migration see no drift.
|
||||
|
||||
**Testing**: `migrations.Stepper` commits each migration separately, so tests
|
||||
built on it cannot surface this. To catch it, seed a row using the new value,
|
||||
then apply the affected migrations in a single transaction (see
|
||||
`TestMigration000504AIProvidersBackfillEnumInSingleTxn`).
|
||||
|
||||
## Handling Nullable Fields
|
||||
|
||||
Use `sql.NullString`, `sql.NullBool`, etc. for optional database fields:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
-- No-op: Postgres does not allow removing enum values safely.
|
||||
-- Matches the precedent in 000495_ai_providers.down.sql for ALTER
|
||||
-- TYPE resource_type / api_key_scope ADD VALUE.
|
||||
-- No-op: the up recreates ai_provider_type with a wider value set, but the
|
||||
-- down does not narrow it back. Narrowing would drop rows that already use the
|
||||
-- new values, and 000495_ai_providers.down.sql drops the type wholesale when
|
||||
-- migrating all the way down.
|
||||
|
||||
@@ -7,9 +7,27 @@
|
||||
-- OpenAI-compatible endpoints. Native gateway-side support for these
|
||||
-- providers comes later, at which point this enum already carries the
|
||||
-- right discriminator and no further migration is needed.
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'azure';
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'bedrock';
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'google';
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'openai-compat';
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'openrouter';
|
||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'vercel';
|
||||
--
|
||||
-- Recreate the type rather than using ALTER TYPE ... ADD VALUE. Postgres
|
||||
-- forbids using a value added by ADD VALUE within the same transaction, and
|
||||
-- all migrations run in one transaction. 000504 casts existing chat_providers
|
||||
-- rows to these new values in that same transaction, so ADD VALUE fails with
|
||||
-- "unsafe use of new value". A freshly created enum's values are usable
|
||||
-- immediately, so the cast in 000504 succeeds.
|
||||
CREATE TYPE new_ai_provider_type AS ENUM (
|
||||
'openai',
|
||||
'anthropic',
|
||||
'azure',
|
||||
'bedrock',
|
||||
'google',
|
||||
'openai-compat',
|
||||
'openrouter',
|
||||
'vercel'
|
||||
);
|
||||
|
||||
ALTER TABLE ai_providers
|
||||
ALTER COLUMN type TYPE new_ai_provider_type USING (type::text::new_ai_provider_type);
|
||||
|
||||
DROP TYPE ai_provider_type;
|
||||
|
||||
ALTER TYPE new_ai_provider_type RENAME TO ai_provider_type;
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -1502,6 +1503,85 @@ func TestMigration000504AIProvidersBackfillOverridesNameConflict(t *testing.T) {
|
||||
require.True(t, fresh.Enabled)
|
||||
}
|
||||
|
||||
// TestMigration000504AIProvidersBackfillEnumInSingleTxn reproduces the
|
||||
// production migration path, where every pending migration runs inside a
|
||||
// single transaction (see pgTxnDriver). Migration 000499 widens
|
||||
// ai_provider_type with ALTER TYPE ... ADD VALUE, and 000504 casts existing
|
||||
// chat_providers rows to that enum. Postgres forbids using an enum value
|
||||
// added by ADD VALUE within the same transaction, so when a legacy provider
|
||||
// uses one of the new values (for example openai-compat) the batch fails with
|
||||
// "unsafe use of new value". The per-step Stepper used by the other tests
|
||||
// commits each migration separately and cannot surface this.
|
||||
func TestMigration000504AIProvidersBackfillEnumInSingleTxn(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
sqlDB := testSQLDB(t)
|
||||
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||||
|
||||
// Apply everything through 498 and commit, so chat_providers exists and is
|
||||
// populated before the batch under test runs, matching a deployment that
|
||||
// ran an earlier migration batch before this one.
|
||||
applyMigrationsInTxn(ctx, t, sqlDB, 1, 498)
|
||||
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
providerID := uuid.New()
|
||||
|
||||
// A legacy provider whose type is one of the values added in 000499.
|
||||
_, err := sqlDB.ExecContext(ctx, `
|
||||
INSERT INTO chat_providers (id, provider, display_name, api_key, enabled, base_url, created_at, updated_at)
|
||||
VALUES ($1, 'openai-compat', 'OpenAI Compatible', '', TRUE, 'https://api.example.com/v1', $2, $2)
|
||||
`, providerID, now)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Apply 000499 through 000504 in a single transaction, as production does.
|
||||
applyMigrationsInTxn(ctx, t, sqlDB, 499, 504)
|
||||
|
||||
var typ string
|
||||
err = sqlDB.QueryRowContext(ctx,
|
||||
`SELECT type FROM ai_providers WHERE id = $1`, providerID,
|
||||
).Scan(&typ)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "openai-compat", typ)
|
||||
}
|
||||
|
||||
// applyMigrationsInTxn executes the up SQL for every migration whose version is
|
||||
// in [from, to] inside a single transaction, mirroring pgTxnDriver. The whole
|
||||
// batch commits or rolls back together.
|
||||
func applyMigrationsInTxn(ctx context.Context, t *testing.T, sqlDB *sql.DB, from, to int) {
|
||||
t.Helper()
|
||||
|
||||
entries, err := os.ReadDir(".")
|
||||
require.NoError(t, err)
|
||||
|
||||
var files []string
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if !strings.HasSuffix(name, ".up.sql") {
|
||||
continue
|
||||
}
|
||||
var version int
|
||||
if _, err := fmt.Sscanf(name, "%06d_", &version); err != nil {
|
||||
continue
|
||||
}
|
||||
if version >= from && version <= to {
|
||||
files = append(files, name)
|
||||
}
|
||||
}
|
||||
slices.Sort(files)
|
||||
|
||||
tx, err := sqlDB.BeginTx(ctx, nil)
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
for _, name := range files {
|
||||
query, err := os.ReadFile(name)
|
||||
require.NoError(t, err)
|
||||
_, err = tx.ExecContext(ctx, string(query))
|
||||
require.NoErrorf(t, err, "apply migration %s", name)
|
||||
}
|
||||
require.NoError(t, tx.Commit())
|
||||
}
|
||||
|
||||
func TestMigration000498SoftDeleteStaleWorkspaceAgents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user