mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix: recreate ai_provider_type instead of ADD VALUE (#25895)
Coder runs all migrations in a single transaction (`pgTxnDriver`). Postgres forbids using an enum value added by `ALTER TYPE ... ADD VALUE` within the same transaction that added it. Migration `000499` widened `ai_provider_type` with `ADD VALUE`, and `000504` casts existing `chat_providers` rows to that enum in the same transaction. On deployments with a legacy provider using one of the new values (for example `openai-compat`), the batch failed with `unsafe use of new value` and the server could not start. Recreate the type (create a new enum, alter the column, drop and rename) instead of using `ADD VALUE`, matching the existing precedent in `000144_user_status_dormant`. A freshly created enum's values are usable immediately in the same transaction, so the cast in `000504` succeeds. The resulting schema is identical, so `make gen` produces no `dump.sql` diff and databases that already applied these migrations see no drift. Added a regression test that seeds an `openai-compat` provider and applies `000499` through `000504` in a single transaction, reproducing the production path. The per-step `Stepper` used by the other migration tests commits each migration separately and cannot surface this class of bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Signed-off-by: Danny Kopping <danny@coder.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,41 @@
|
|||||||
dimension.
|
dimension.
|
||||||
- Avoid `ByX` names for grouped queries.
|
- Avoid `ByX` names for grouped queries.
|
||||||
|
|
||||||
|
### Enum Changes Run in a Single Transaction
|
||||||
|
|
||||||
|
All migrations run inside one transaction (`pgTxnDriver`). Postgres forbids
|
||||||
|
*using* an enum value added by `ALTER TYPE ... ADD VALUE` within the same
|
||||||
|
transaction that added it, so it fails with `unsafe use of new value`.
|
||||||
|
|
||||||
|
Adding the value is fine; using it in the same batch is not. "Using it"
|
||||||
|
includes a later migration that casts to it (`col::my_enum`), inserts or
|
||||||
|
updates a row with it, or sets it as a column default. This only fails when a
|
||||||
|
row actually materializes the new value, so fresh databases and CI pass while
|
||||||
|
deployments with existing data break.
|
||||||
|
|
||||||
|
**MUST DO**: If any migration uses a newly added enum value, recreate the type
|
||||||
|
instead of using `ADD VALUE`. A freshly created enum's values are usable
|
||||||
|
immediately in the same transaction. Precedent: `000144_user_status_dormant`.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TYPE new_my_enum AS ENUM ('existing', 'value', 'new_value');
|
||||||
|
|
||||||
|
ALTER TABLE my_table
|
||||||
|
ALTER COLUMN col TYPE new_my_enum USING (col::text::new_my_enum);
|
||||||
|
|
||||||
|
DROP TYPE my_enum;
|
||||||
|
|
||||||
|
ALTER TYPE new_my_enum RENAME TO my_enum;
|
||||||
|
```
|
||||||
|
|
||||||
|
Recreating produces an identical schema, so `make gen` yields no `dump.sql`
|
||||||
|
diff and databases that already applied the migration see no drift.
|
||||||
|
|
||||||
|
**Testing**: `migrations.Stepper` commits each migration separately, so tests
|
||||||
|
built on it cannot surface this. To catch it, seed a row using the new value,
|
||||||
|
then apply the affected migrations in a single transaction (see
|
||||||
|
`TestMigration000504AIProvidersBackfillEnumInSingleTxn`).
|
||||||
|
|
||||||
## Handling Nullable Fields
|
## Handling Nullable Fields
|
||||||
|
|
||||||
Use `sql.NullString`, `sql.NullBool`, etc. for optional database fields:
|
Use `sql.NullString`, `sql.NullBool`, etc. for optional database fields:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
-- No-op: Postgres does not allow removing enum values safely.
|
-- No-op: the up recreates ai_provider_type with a wider value set, but the
|
||||||
-- Matches the precedent in 000495_ai_providers.down.sql for ALTER
|
-- down does not narrow it back. Narrowing would drop rows that already use the
|
||||||
-- TYPE resource_type / api_key_scope ADD VALUE.
|
-- new values, and 000495_ai_providers.down.sql drops the type wholesale when
|
||||||
|
-- migrating all the way down.
|
||||||
|
|||||||
@@ -7,9 +7,27 @@
|
|||||||
-- OpenAI-compatible endpoints. Native gateway-side support for these
|
-- OpenAI-compatible endpoints. Native gateway-side support for these
|
||||||
-- providers comes later, at which point this enum already carries the
|
-- providers comes later, at which point this enum already carries the
|
||||||
-- right discriminator and no further migration is needed.
|
-- right discriminator and no further migration is needed.
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'azure';
|
--
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'bedrock';
|
-- Recreate the type rather than using ALTER TYPE ... ADD VALUE. Postgres
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'google';
|
-- forbids using a value added by ADD VALUE within the same transaction, and
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'openai-compat';
|
-- all migrations run in one transaction. 000504 casts existing chat_providers
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'openrouter';
|
-- rows to these new values in that same transaction, so ADD VALUE fails with
|
||||||
ALTER TYPE ai_provider_type ADD VALUE IF NOT EXISTS 'vercel';
|
-- "unsafe use of new value". A freshly created enum's values are usable
|
||||||
|
-- immediately, so the cast in 000504 succeeds.
|
||||||
|
CREATE TYPE new_ai_provider_type AS ENUM (
|
||||||
|
'openai',
|
||||||
|
'anthropic',
|
||||||
|
'azure',
|
||||||
|
'bedrock',
|
||||||
|
'google',
|
||||||
|
'openai-compat',
|
||||||
|
'openrouter',
|
||||||
|
'vercel'
|
||||||
|
);
|
||||||
|
|
||||||
|
ALTER TABLE ai_providers
|
||||||
|
ALTER COLUMN type TYPE new_ai_provider_type USING (type::text::new_ai_provider_type);
|
||||||
|
|
||||||
|
DROP TYPE ai_provider_type;
|
||||||
|
|
||||||
|
ALTER TYPE new_ai_provider_type RENAME TO ai_provider_type;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -1502,6 +1503,85 @@ func TestMigration000504AIProvidersBackfillOverridesNameConflict(t *testing.T) {
|
|||||||
require.True(t, fresh.Enabled)
|
require.True(t, fresh.Enabled)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestMigration000504AIProvidersBackfillEnumInSingleTxn reproduces the
|
||||||
|
// production migration path, where every pending migration runs inside a
|
||||||
|
// single transaction (see pgTxnDriver). Migration 000499 widens
|
||||||
|
// ai_provider_type with ALTER TYPE ... ADD VALUE, and 000504 casts existing
|
||||||
|
// chat_providers rows to that enum. Postgres forbids using an enum value
|
||||||
|
// added by ADD VALUE within the same transaction, so when a legacy provider
|
||||||
|
// uses one of the new values (for example openai-compat) the batch fails with
|
||||||
|
// "unsafe use of new value". The per-step Stepper used by the other tests
|
||||||
|
// commits each migration separately and cannot surface this.
|
||||||
|
func TestMigration000504AIProvidersBackfillEnumInSingleTxn(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
sqlDB := testSQLDB(t)
|
||||||
|
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||||||
|
|
||||||
|
// Apply everything through 498 and commit, so chat_providers exists and is
|
||||||
|
// populated before the batch under test runs, matching a deployment that
|
||||||
|
// ran an earlier migration batch before this one.
|
||||||
|
applyMigrationsInTxn(ctx, t, sqlDB, 1, 498)
|
||||||
|
|
||||||
|
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||||
|
providerID := uuid.New()
|
||||||
|
|
||||||
|
// A legacy provider whose type is one of the values added in 000499.
|
||||||
|
_, err := sqlDB.ExecContext(ctx, `
|
||||||
|
INSERT INTO chat_providers (id, provider, display_name, api_key, enabled, base_url, created_at, updated_at)
|
||||||
|
VALUES ($1, 'openai-compat', 'OpenAI Compatible', '', TRUE, 'https://api.example.com/v1', $2, $2)
|
||||||
|
`, providerID, now)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Apply 000499 through 000504 in a single transaction, as production does.
|
||||||
|
applyMigrationsInTxn(ctx, t, sqlDB, 499, 504)
|
||||||
|
|
||||||
|
var typ string
|
||||||
|
err = sqlDB.QueryRowContext(ctx,
|
||||||
|
`SELECT type FROM ai_providers WHERE id = $1`, providerID,
|
||||||
|
).Scan(&typ)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "openai-compat", typ)
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyMigrationsInTxn executes the up SQL for every migration whose version is
|
||||||
|
// in [from, to] inside a single transaction, mirroring pgTxnDriver. The whole
|
||||||
|
// batch commits or rolls back together.
|
||||||
|
func applyMigrationsInTxn(ctx context.Context, t *testing.T, sqlDB *sql.DB, from, to int) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(".")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var files []string
|
||||||
|
for _, entry := range entries {
|
||||||
|
name := entry.Name()
|
||||||
|
if !strings.HasSuffix(name, ".up.sql") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var version int
|
||||||
|
if _, err := fmt.Sscanf(name, "%06d_", &version); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if version >= from && version <= to {
|
||||||
|
files = append(files, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.Sort(files)
|
||||||
|
|
||||||
|
tx, err := sqlDB.BeginTx(ctx, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
for _, name := range files {
|
||||||
|
query, err := os.ReadFile(name)
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = tx.ExecContext(ctx, string(query))
|
||||||
|
require.NoErrorf(t, err, "apply migration %s", name)
|
||||||
|
}
|
||||||
|
require.NoError(t, tx.Commit())
|
||||||
|
}
|
||||||
|
|
||||||
func TestMigration000498SoftDeleteStaleWorkspaceAgents(t *testing.T) {
|
func TestMigration000498SoftDeleteStaleWorkspaceAgents(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user