mirror of
https://github.com/coder/coder.git
synced 2026-06-03 13:08:25 +00:00
8cc6473736
## Problem `TestMigrate/Parallel` flakes with: ``` timeout: can't acquire database lock ``` ## Root Cause The test runs two concurrent `migrations.Up(db)` calls on the same database. golang-migrate wraps every `Lock()` call with a [15-second timeout](https://github.com/golang-migrate/migrate/blob/v4.19.0/migrate.go#L29) (`DefaultLockTimeout`). Our `pgTxnDriver.Lock()` uses `pg_advisory_xact_lock`, which blocks until the lock is available. With 430+ migrations, the first caller can hold the lock well beyond 15s (the failing test ran for 25.88s), causing the second caller to hit the timeout. ## Fix Set `m.LockTimeout = 2 * time.Minute` after creating the `migrate.Migrate` instance in `setup()`. Since `pg_advisory_xact_lock` releases automatically when the transaction commits, there's no risk of a stuck lock — we just need to wait long enough for a concurrent migration to finish.
284 lines
7.4 KiB
Go
284 lines
7.4 KiB
Go
package migrations
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"database/sql"
|
|
"embed"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/golang-migrate/migrate/v4"
|
|
"github.com/golang-migrate/migrate/v4/source"
|
|
"github.com/golang-migrate/migrate/v4/source/iofs"
|
|
"golang.org/x/xerrors"
|
|
)
|
|
|
|
//go:embed *.sql
|
|
var migrations embed.FS
|
|
|
|
var (
|
|
migrationsHash string
|
|
migrationsHashOnce sync.Once
|
|
)
|
|
|
|
// A migrations hash is a sha256 hash of the contents and names
|
|
// of the migrations sorted by filename.
|
|
func calculateMigrationsHash(migrationsFs embed.FS) (string, error) {
|
|
files, err := migrationsFs.ReadDir(".")
|
|
if err != nil {
|
|
return "", xerrors.Errorf("read migrations directory: %w", err)
|
|
}
|
|
sortedFiles := make([]fs.DirEntry, len(files))
|
|
copy(sortedFiles, files)
|
|
sort.Slice(sortedFiles, func(i, j int) bool {
|
|
return sortedFiles[i].Name() < sortedFiles[j].Name()
|
|
})
|
|
|
|
var builder strings.Builder
|
|
for _, file := range sortedFiles {
|
|
if _, err := builder.WriteString(file.Name()); err != nil {
|
|
return "", xerrors.Errorf("write migration file name %q: %w", file.Name(), err)
|
|
}
|
|
content, err := migrationsFs.ReadFile(file.Name())
|
|
if err != nil {
|
|
return "", xerrors.Errorf("read migration file %q: %w", file.Name(), err)
|
|
}
|
|
if _, err := builder.Write(content); err != nil {
|
|
return "", xerrors.Errorf("write migration file content %q: %w", file.Name(), err)
|
|
}
|
|
}
|
|
|
|
hash := sha256.New()
|
|
if _, err := hash.Write([]byte(builder.String())); err != nil {
|
|
return "", xerrors.Errorf("write to hash: %w", err)
|
|
}
|
|
return fmt.Sprintf("%x", hash.Sum(nil)), nil
|
|
}
|
|
|
|
func GetMigrationsHash() string {
|
|
migrationsHashOnce.Do(func() {
|
|
hash, err := calculateMigrationsHash(migrations)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
migrationsHash = hash
|
|
})
|
|
return migrationsHash
|
|
}
|
|
|
|
func setup(db *sql.DB, migs fs.FS) (source.Driver, *migrate.Migrate, error) {
|
|
if migs == nil {
|
|
migs = migrations
|
|
}
|
|
ctx := context.Background()
|
|
sourceDriver, err := iofs.New(migs, ".")
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("create iofs: %w", err)
|
|
}
|
|
|
|
// migration_cursor is a v1 migration table. If this exists, we're on v1.
|
|
// Do no run v2 migrations on a v1 database!
|
|
row := db.QueryRowContext(ctx, "SELECT 1 FROM information_schema.tables WHERE table_schema = current_schema() AND table_name = 'migration_cursor';")
|
|
var v1Exists int
|
|
if row.Scan(&v1Exists) == nil {
|
|
return nil, nil, xerrors.New("currently connected to a Coder v1 database, aborting database setup")
|
|
}
|
|
|
|
dbDriver := &pgTxnDriver{ctx: context.Background(), db: db}
|
|
err = dbDriver.ensureVersionTable()
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("ensure version table: %w", err)
|
|
}
|
|
|
|
m, err := migrate.NewWithInstance("", sourceDriver, "", dbDriver)
|
|
if err != nil {
|
|
return nil, nil, xerrors.Errorf("new migrate instance: %w", err)
|
|
}
|
|
|
|
// The default LockTimeout of 15s is too short for concurrent migrations,
|
|
// especially when the number of migrations is large. Since we use
|
|
// pg_advisory_xact_lock which releases automatically when the transaction
|
|
// ends, we just need to wait long enough for any concurrent migration to
|
|
// finish.
|
|
m.LockTimeout = 2 * time.Minute
|
|
|
|
return sourceDriver, m, nil
|
|
}
|
|
|
|
// Up runs SQL migrations to ensure the database schema is up-to-date.
|
|
func Up(db *sql.DB) error {
|
|
return UpWithFS(db, migrations)
|
|
}
|
|
|
|
// UpWithFS runs SQL migrations in the given fs.
|
|
func UpWithFS(db *sql.DB, migs fs.FS) (retErr error) {
|
|
_, m, err := setup(db, migs)
|
|
if err != nil {
|
|
return xerrors.Errorf("migrate setup: %w", err)
|
|
}
|
|
defer func() {
|
|
srcErr, dbErr := m.Close()
|
|
if retErr != nil {
|
|
return
|
|
}
|
|
if dbErr != nil {
|
|
retErr = dbErr
|
|
return
|
|
}
|
|
retErr = srcErr
|
|
}()
|
|
|
|
err = m.Up()
|
|
if err != nil {
|
|
if errors.Is(err, migrate.ErrNoChange) {
|
|
// It's OK if no changes happened!
|
|
return nil
|
|
}
|
|
|
|
return xerrors.Errorf("up: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Down runs all down SQL migrations.
|
|
func Down(db *sql.DB) error {
|
|
_, m, err := setup(db, migrations)
|
|
if err != nil {
|
|
return xerrors.Errorf("migrate setup: %w", err)
|
|
}
|
|
|
|
err = m.Down()
|
|
if err != nil {
|
|
if errors.Is(err, migrate.ErrNoChange) {
|
|
// It's OK if no changes happened!
|
|
return nil
|
|
}
|
|
|
|
return xerrors.Errorf("down: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// EnsureClean checks whether all migrations for the current version have been
|
|
// applied, without making any changes to the database. If not, returns a
|
|
// non-nil error.
|
|
func EnsureClean(db *sql.DB) error {
|
|
sourceDriver, m, err := setup(db, migrations)
|
|
if err != nil {
|
|
return xerrors.Errorf("migrate setup: %w", err)
|
|
}
|
|
|
|
version, dirty, err := m.Version()
|
|
if err != nil {
|
|
return xerrors.Errorf("get migration version: %w", err)
|
|
}
|
|
|
|
if dirty {
|
|
return xerrors.Errorf("database has not been cleanly migrated")
|
|
}
|
|
|
|
// Verify that the database's migration version is "current" by checking
|
|
// that a migration with that version exists, but there is no next version.
|
|
err = CheckLatestVersion(sourceDriver, version)
|
|
if err != nil {
|
|
return xerrors.Errorf("database needs migration: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Returns nil if currentVersion corresponds to the latest available migration,
|
|
// otherwise an error explaining why not.
|
|
func CheckLatestVersion(sourceDriver source.Driver, currentVersion uint) error {
|
|
// This is ugly, but seems like the only way to do it with the public
|
|
// interfaces provided by golang-migrate.
|
|
|
|
// Check that there is no later version
|
|
nextVersion, err := sourceDriver.Next(currentVersion)
|
|
if err == nil {
|
|
return xerrors.Errorf("current version is %d, but later version %d exists", currentVersion, nextVersion)
|
|
}
|
|
if !errors.Is(err, os.ErrNotExist) {
|
|
return xerrors.Errorf("get next migration after %d: %w", currentVersion, err)
|
|
}
|
|
|
|
// Once we reach this point, we know that either currentVersion doesn't
|
|
// exist, or it has no successor (the return value from
|
|
// sourceDriver.Next() is the same in either case). So we need to check
|
|
// that either it's the first version, or it has a predecessor.
|
|
|
|
firstVersion, err := sourceDriver.First()
|
|
if err != nil {
|
|
// the total number of migrations should be non-zero, so this must be
|
|
// an actual error, not just a missing file
|
|
return xerrors.Errorf("get first migration: %w", err)
|
|
}
|
|
if firstVersion == currentVersion {
|
|
return nil
|
|
}
|
|
|
|
_, err = sourceDriver.Prev(currentVersion)
|
|
if err != nil {
|
|
return xerrors.Errorf("get previous migration: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Stepper returns a function that runs SQL migrations one step at a time.
|
|
//
|
|
// Stepper cannot be closed pre-emptively, it must be run to completion
|
|
// (or until an error is encountered).
|
|
func Stepper(db *sql.DB) (next func() (version uint, more bool, err error), err error) {
|
|
_, m, err := setup(db, migrations)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("migrate setup: %w", err)
|
|
}
|
|
|
|
return func() (version uint, more bool, err error) {
|
|
defer func() {
|
|
if !more {
|
|
srcErr, dbErr := m.Close()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if dbErr != nil {
|
|
err = dbErr
|
|
return
|
|
}
|
|
err = srcErr
|
|
}
|
|
}()
|
|
|
|
err = m.Steps(1)
|
|
if err != nil {
|
|
switch {
|
|
case errors.Is(err, migrate.ErrNoChange):
|
|
// It's OK if no changes happened!
|
|
return 0, false, nil
|
|
case errors.Is(err, fs.ErrNotExist):
|
|
// This error is encountered at the of Steps when
|
|
// reading from embed.FS.
|
|
return 0, false, nil
|
|
}
|
|
|
|
return 0, false, xerrors.Errorf("Step: %w", err)
|
|
}
|
|
|
|
v, _, err := m.Version()
|
|
if err != nil {
|
|
return 0, false, err
|
|
}
|
|
|
|
return v, true, nil
|
|
}, nil
|
|
}
|