chore: acquire lock for individual workspace transition (#15859)

When Coder is ran in High Availability mode, each Coder instance has a
lifecycle executor. These lifecycle executors are all trying to do the
same work, and whilst transactions saves us from this causing an issue,
we are still doing extra work that could be prevented.

This PR adds a `TryAcquireLock` call for each attempted workspace
transition, meaning two Coder instances shouldn't duplicate effort.
This commit is contained in:
Danielle Maywood
2024-12-13 16:59:27 +00:00
committed by GitHub
parent d504e0ea66
commit 50ff06cc3c
2 changed files with 82 additions and 1 deletions
+11 -1
View File
@@ -3,6 +3,7 @@ package autobuild
import (
"context"
"database/sql"
"fmt"
"net/http"
"sync"
"sync/atomic"
@@ -177,6 +178,15 @@ func (e *Executor) runOnce(t time.Time) Stats {
err := e.db.InTx(func(tx database.Store) error {
var err error
ok, err := tx.TryAcquireLock(e.ctx, database.GenLockID(fmt.Sprintf("lifecycle-executor:%s", wsID)))
if err != nil {
return xerrors.Errorf("try acquire lifecycle executor lock: %w", err)
}
if !ok {
log.Debug(e.ctx, "unable to acquire lock for workspace, skipping")
return nil
}
// Re-check eligibility since the first check was outside the
// transaction and the workspace settings may have changed.
ws, err = tx.GetWorkspaceByID(e.ctx, wsID)
@@ -389,7 +399,7 @@ func (e *Executor) runOnce(t time.Time) Stats {
}
return nil
}()
if err != nil {
if err != nil && !xerrors.Is(err, context.Canceled) {
log.Error(e.ctx, "failed to transition workspace", slog.Error(err))
statsMu.Lock()
stats.Errors[wsID] = err
@@ -18,6 +18,7 @@ import (
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
"github.com/coder/coder/v2/coderd/schedule"
@@ -72,6 +73,76 @@ func TestExecutorAutostartOK(t *testing.T) {
require.Equal(t, template.AutostartRequirement.DaysOfWeek, []string{"monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"})
}
func TestMultipleLifecycleExecutors(t *testing.T) {
t.Parallel()
db, ps := dbtestutil.NewDB(t)
var (
sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *")
// Create our first client
tickCh = make(chan time.Time, 2)
statsChA = make(chan autobuild.Stats)
clientA = coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
AutobuildTicker: tickCh,
AutobuildStats: statsChA,
Database: db,
Pubsub: ps,
})
// ... And then our second client
statsChB = make(chan autobuild.Stats)
_ = coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
AutobuildTicker: tickCh,
AutobuildStats: statsChB,
Database: db,
Pubsub: ps,
})
// Now create a workspace (we can use either client, it doesn't matter)
workspace = mustProvisionWorkspace(t, clientA, func(cwr *codersdk.CreateWorkspaceRequest) {
cwr.AutostartSchedule = ptr.Ref(sched.String())
})
)
// Have the workspace stopped so we can perform an autostart
workspace = coderdtest.MustTransitionWorkspace(t, clientA, workspace.ID, database.WorkspaceTransitionStart, database.WorkspaceTransitionStop)
// Get both clients to perform a lifecycle execution tick
next := sched.Next(workspace.LatestBuild.CreatedAt)
startCh := make(chan struct{})
go func() {
<-startCh
tickCh <- next
}()
go func() {
<-startCh
tickCh <- next
}()
close(startCh)
// Now we want to check the stats for both clients
statsA := <-statsChA
statsB := <-statsChB
// We expect there to be no errors
assert.Len(t, statsA.Errors, 0)
assert.Len(t, statsB.Errors, 0)
// We also expect there to have been only one transition
require.Equal(t, 1, len(statsA.Transitions)+len(statsB.Transitions))
stats := statsA
if len(statsB.Transitions) == 1 {
stats = statsB
}
// And we expect this transition to have been a start transition
assert.Contains(t, stats.Transitions, workspace.ID)
assert.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID])
}
func TestExecutorAutostartTemplateUpdated(t *testing.T) {
t.Parallel()