mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
chore: acquire lock for individual workspace transition (#15859)
When Coder is ran in High Availability mode, each Coder instance has a lifecycle executor. These lifecycle executors are all trying to do the same work, and whilst transactions saves us from this causing an issue, we are still doing extra work that could be prevented. This PR adds a `TryAcquireLock` call for each attempted workspace transition, meaning two Coder instances shouldn't duplicate effort.
This commit is contained in:
@@ -3,6 +3,7 @@ package autobuild
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -177,6 +178,15 @@ func (e *Executor) runOnce(t time.Time) Stats {
|
||||
err := e.db.InTx(func(tx database.Store) error {
|
||||
var err error
|
||||
|
||||
ok, err := tx.TryAcquireLock(e.ctx, database.GenLockID(fmt.Sprintf("lifecycle-executor:%s", wsID)))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("try acquire lifecycle executor lock: %w", err)
|
||||
}
|
||||
if !ok {
|
||||
log.Debug(e.ctx, "unable to acquire lock for workspace, skipping")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Re-check eligibility since the first check was outside the
|
||||
// transaction and the workspace settings may have changed.
|
||||
ws, err = tx.GetWorkspaceByID(e.ctx, wsID)
|
||||
@@ -389,7 +399,7 @@ func (e *Executor) runOnce(t time.Time) Stats {
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
if err != nil && !xerrors.Is(err, context.Canceled) {
|
||||
log.Error(e.ctx, "failed to transition workspace", slog.Error(err))
|
||||
statsMu.Lock()
|
||||
stats.Errors[wsID] = err
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||
"github.com/coder/coder/v2/coderd/database"
|
||||
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
||||
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
||||
"github.com/coder/coder/v2/coderd/notifications"
|
||||
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
|
||||
"github.com/coder/coder/v2/coderd/schedule"
|
||||
@@ -72,6 +73,76 @@ func TestExecutorAutostartOK(t *testing.T) {
|
||||
require.Equal(t, template.AutostartRequirement.DaysOfWeek, []string{"monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"})
|
||||
}
|
||||
|
||||
func TestMultipleLifecycleExecutors(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
db, ps := dbtestutil.NewDB(t)
|
||||
|
||||
var (
|
||||
sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *")
|
||||
// Create our first client
|
||||
tickCh = make(chan time.Time, 2)
|
||||
statsChA = make(chan autobuild.Stats)
|
||||
clientA = coderdtest.New(t, &coderdtest.Options{
|
||||
IncludeProvisionerDaemon: true,
|
||||
AutobuildTicker: tickCh,
|
||||
AutobuildStats: statsChA,
|
||||
Database: db,
|
||||
Pubsub: ps,
|
||||
})
|
||||
// ... And then our second client
|
||||
statsChB = make(chan autobuild.Stats)
|
||||
_ = coderdtest.New(t, &coderdtest.Options{
|
||||
IncludeProvisionerDaemon: true,
|
||||
AutobuildTicker: tickCh,
|
||||
AutobuildStats: statsChB,
|
||||
Database: db,
|
||||
Pubsub: ps,
|
||||
})
|
||||
// Now create a workspace (we can use either client, it doesn't matter)
|
||||
workspace = mustProvisionWorkspace(t, clientA, func(cwr *codersdk.CreateWorkspaceRequest) {
|
||||
cwr.AutostartSchedule = ptr.Ref(sched.String())
|
||||
})
|
||||
)
|
||||
|
||||
// Have the workspace stopped so we can perform an autostart
|
||||
workspace = coderdtest.MustTransitionWorkspace(t, clientA, workspace.ID, database.WorkspaceTransitionStart, database.WorkspaceTransitionStop)
|
||||
|
||||
// Get both clients to perform a lifecycle execution tick
|
||||
next := sched.Next(workspace.LatestBuild.CreatedAt)
|
||||
|
||||
startCh := make(chan struct{})
|
||||
go func() {
|
||||
<-startCh
|
||||
tickCh <- next
|
||||
}()
|
||||
go func() {
|
||||
<-startCh
|
||||
tickCh <- next
|
||||
}()
|
||||
close(startCh)
|
||||
|
||||
// Now we want to check the stats for both clients
|
||||
statsA := <-statsChA
|
||||
statsB := <-statsChB
|
||||
|
||||
// We expect there to be no errors
|
||||
assert.Len(t, statsA.Errors, 0)
|
||||
assert.Len(t, statsB.Errors, 0)
|
||||
|
||||
// We also expect there to have been only one transition
|
||||
require.Equal(t, 1, len(statsA.Transitions)+len(statsB.Transitions))
|
||||
|
||||
stats := statsA
|
||||
if len(statsB.Transitions) == 1 {
|
||||
stats = statsB
|
||||
}
|
||||
|
||||
// And we expect this transition to have been a start transition
|
||||
assert.Contains(t, stats.Transitions, workspace.ID)
|
||||
assert.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID])
|
||||
}
|
||||
|
||||
func TestExecutorAutostartTemplateUpdated(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user