fix: fix flake in TestExecutorAutostartSkipsWhenNoProvisionersAvailable (#19478)

The flake here had two causes:
1. related to usage of time.Now() in MustWaitForProvisionersAvailable
and
2. the fact that UpdateProvisionerLastSeenAt can not use a time that is
further in the past than the current LastSeenAt time

Previously the test here was calling
`coderdtest.MustWaitForProvisionersAvailable` which was using `time.Now`
rather than the next tick time like the real `hasProvisionersAvailable`
function does. Additionally, when using `UpdateProvisionerLastSeenAt`
the underlying db query enforces that the time we're trying to set
`LastSeenAt` to cannot be older than the current value.

I was able to reliably reproduce the flake by executing both the
`UpdateProvisionerLastSeenAt` call and `tickCh <- next` in their own
goroutines, the former with a small sleep to reliably ensure we'd
trigger the autobuild before we set the `LastSeenAt` time. That's when I
also noticed that `coderdtest.MustWaitForProvisionersAvailable` was
using `time.Now` instead of the tick time. When I updated that function
to take in a tick time + added a 2nd call to
`UpdateProvisionerLastSeenAt` to set an original non-stale time, we
could then never get the test to pass because the later call to set the
stale time would not actually modify `LastSeenAt`. On top of that,
calling the provisioner daemons closer in the middle of the function
doesn't really do anything of value in this test.

**The fix for the flake is to keep the go routines, ensuring there would
be a flake if there was not a relevant fix, but to include the fix which
is to ensure that we explicitly wait for the provisioner to be stale
before passing the time to `tickCh`.**

---------

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan
2025-08-28 12:07:50 -07:00
committed by GitHub
parent 95dccf3424
commit 321c2b8fce
3 changed files with 63 additions and 20 deletions
+36 -8
View File
@@ -1649,19 +1649,48 @@ func UpdateProvisionerLastSeenAt(t *testing.T, db database.Store, id uuid.UUID,
func MustWaitForAnyProvisioner(t *testing.T, db database.Store) {
t.Helper()
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitShort))
require.Eventually(t, func() bool {
// testutil.Eventually(t, func)
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
daemons, err := db.GetProvisionerDaemons(ctx)
return err == nil && len(daemons) > 0
}, testutil.WaitShort, testutil.IntervalFast)
}, testutil.IntervalFast, "no provisioner daemons found")
}
// MustWaitForProvisionersUnavailable waits for provisioners to become unavailable for a specific workspace
func MustWaitForProvisionersUnavailable(t *testing.T, db database.Store, workspace codersdk.Workspace, tags map[string]string, checkTime time.Time) {
t.Helper()
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitMedium))
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
// Use the same logic as hasValidProvisioner but expect false
provisionerDaemons, err := db.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{
OrganizationID: workspace.OrganizationID,
WantTags: tags,
})
if err != nil {
return false
}
// Check if NO provisioners are active (all are stale or gone)
for _, pd := range provisionerDaemons {
if pd.LastSeenAt.Valid {
age := checkTime.Sub(pd.LastSeenAt.Time)
if age <= provisionerdserver.StaleInterval {
return false // Found an active provisioner, keep waiting
}
}
}
return true // No active provisioners found
}, testutil.IntervalFast, "there are still provisioners available for workspace, expected none")
}
// MustWaitForProvisionersAvailable waits for provisioners to be available for a specific workspace.
func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace codersdk.Workspace) uuid.UUID {
func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace codersdk.Workspace, ts time.Time) uuid.UUID {
t.Helper()
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitShort))
ctx := ctxWithProvisionerPermissions(testutil.Context(t, testutil.WaitLong))
id := uuid.UUID{}
// Get the workspace from the database
require.Eventually(t, func() bool {
testutil.Eventually(ctx, t, func(ctx context.Context) (done bool) {
ws, err := db.GetWorkspaceByID(ctx, workspace.ID)
if err != nil {
return false
@@ -1689,10 +1718,9 @@ func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace
}
// Check if any provisioners are active (not stale)
now := time.Now()
for _, pd := range provisionerDaemons {
if pd.LastSeenAt.Valid {
age := now.Sub(pd.LastSeenAt.Time)
age := ts.Sub(pd.LastSeenAt.Time)
if age <= provisionerdserver.StaleInterval {
id = pd.ID
return true // Found an active provisioner
@@ -1700,7 +1728,7 @@ func MustWaitForProvisionersAvailable(t *testing.T, db database.Store, workspace
}
}
return false // No active provisioners found
}, testutil.WaitLong, testutil.IntervalFast)
}, testutil.IntervalFast, "no active provisioners available for workspace, expected at least one (non-stale)")
return id
}