Files
coder/enterprise/coderd/prebuilds/reconcile_test.go
T
Callum Styan 5f3be6b288 feat: add provisioner job queue wait time histogram and jobs enqueued counter (#21869)
This PR adds some metrics to help identify job enqueue rates and
latencies. This work was initiated as a way to help reduce the cost of
the observation/measurement itself for autostart scaletests, which
impacts our ability to identify/reason about the load caused by
autostart. See: https://github.com/coder/internal/issues/1209

I've extended the metrics here to account for regular user initiated
builds, prebuilds, autostarts, etc. IMO there is still the question here
of whether we want to include or need the `transition` label, which is
only present on workspace builds. Including it does lead to an increase
in cardinality, and in the case of the histogram (when not using native
histograms) that's at least a few extra series for every bucket. We
could remove the transition label there but keep it on the counter.

Additionally, the histogram is currently observing latencies for other
jobs, such as template builds/version imports, those do not have a
transition type associated with them.

Tested briefly in a workspace, can see metric values like the following:
-
`coderd_workspace_builds_enqueued_total{build_reason="autostart",provisioner_type="terraform",status="success",transition="start"}
1`
-
`coderd_provisioner_job_queue_wait_seconds_bucket{build_reason="autostart",job_type="workspace_build",provisioner_type="terraform",transition="start",le="0.025"}
1`

---------

Signed-off-by: Callum Styan <callumstyan@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-12 13:40:47 -08:00

3575 lines
122 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package prebuilds_test
import (
"context"
"database/sql"
"fmt"
"sort"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/trace/noop"
"golang.org/x/xerrors"
"tailscale.com/types/ptr"
"cdr.dev/slog/v3"
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbfake"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/files"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/util/slice"
"github.com/coder/coder/v2/coderd/wsbuilder"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/coder/v2/testutil"
"github.com/coder/quartz"
"github.com/coder/serpent"
)
func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
// Scenario: No reconciliation actions are taken if there are no presets
t.Parallel()
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitLong)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// given a template version with no presets
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
// verify that the db state is correct
gotTemplateVersion, err := db.GetTemplateVersionByID(ctx, templateVersion.ID)
require.NoError(t, err)
require.Equal(t, templateVersion, gotTemplateVersion)
// when we trigger the reconciliation loop for all templates
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// then no reconciliation actions are taken
// because without presets, there are no prebuilds
// and without prebuilds, there is nothing to reconcile
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
require.NoError(t, err)
require.Empty(t, jobs)
}
func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
// Scenario: No reconciliation actions are taken if there are no prebuilds
t.Parallel()
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitLong)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// given there are presets, but no prebuilds
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
TemplateVersionID: templateVersion.ID,
Name: "test",
})
require.NoError(t, err)
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
require.NoError(t, err)
// verify that the db state is correct
presetParameters, err := db.GetPresetParametersByTemplateVersionID(ctx, templateVersion.ID)
require.NoError(t, err)
require.NotEmpty(t, presetParameters)
// when we trigger the reconciliation loop for all templates
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// then no reconciliation actions are taken
// because without prebuilds, there is nothing to reconcile
// even if there are presets
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
require.NoError(t, err)
require.Empty(t, jobs)
}
func TestPrebuildReconciliation(t *testing.T) {
t.Parallel()
testScenarios := []testScenario{
{
name: "never create prebuilds for inactive template versions",
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: allJobStatuses,
templateVersionActive: []bool{false},
shouldCreateNewPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "no need to create a new prebuild if one is already running",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
templateDeleted: []bool{false},
shouldCreateNewPrebuild: ptr.To(false),
},
{
name: "don't create a new prebuild if one is queued to build or already building",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "create a new prebuild if one is in a state that disqualifies it from ever being claimed",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusCanceling,
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
// See TestFailedBuildBackoff for the start/failed case.
name: "create a new prebuild if one is in any kind of exceptional state",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusCanceled,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
// TODO(ssncferreira): Investigate why the GetRunningPrebuiltWorkspaces query is returning 0 rows.
// When a template version is inactive (templateVersionActive = false), any prebuilds in the
// database.ProvisionerJobStatusRunning state should be deleted.
name: "never attempt to interfere with prebuilds from an active template version",
// The workspace builder does not allow scheduling a new build if there is already a build
// pending, running, or canceling. As such, we should never attempt to start, stop or delete
// such prebuilds. Rather, we should wait for the existing build to complete and reconcile
// again in the next cycle.
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusCanceling,
},
templateVersionActive: []bool{true},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "never delete prebuilds in an exceptional state",
// We don't want to destroy evidence that might be useful to operators
// when troubleshooting issues. So we leave these prebuilds in place.
// Operators are expected to manually delete these prebuilds.
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusCanceled,
database.ProvisionerJobStatusFailed,
},
templateVersionActive: []bool{true, false},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "delete running prebuilds for inactive template versions",
// We only support prebuilds for active template versions.
// If a template version is inactive, we should delete any prebuilds
// that are running.
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{false},
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
name: "don't delete running prebuilds for active template versions",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "don't delete stopped or already deleted prebuilds",
// We don't ever stop prebuilds. A stopped prebuild is an exceptional state.
// As such we keep it, to allow operators to investigate the cause.
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true, false},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
// the template was soft-deleted any running prebuilds will be removed.
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
name: "soft-deleted templates MAY have prebuilds",
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
templateVersionActive: []bool{true, false},
shouldCreateNewPrebuild: ptr.To(false),
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{true},
},
}
for _, tc := range testScenarios {
testCases := tc.testCases()
for _, tc := range testCases {
tc.run(t)
}
}
}
// testScenario is a collection of test cases that illustrate the same business rule.
// A testScenario describes a set of test properties for which the same test expecations
// hold. A testScenario may be decomposed into multiple testCase structs, which can then be run.
type testScenario struct {
name string
prebuildLatestTransitions []database.WorkspaceTransition
prebuildJobStatuses []database.ProvisionerJobStatus
templateVersionActive []bool
templateDeleted []bool
shouldCreateNewPrebuild *bool
shouldDeleteOldPrebuild *bool
expectOrgMembership *bool
expectGroupMembership *bool
}
func (ts testScenario) testCases() []testCase {
testCases := []testCase{}
for _, templateVersionActive := range ts.templateVersionActive {
for _, prebuildLatestTransition := range ts.prebuildLatestTransitions {
for _, prebuildJobStatus := range ts.prebuildJobStatuses {
for _, templateDeleted := range ts.templateDeleted {
for _, useBrokenPubsub := range []bool{true, false} {
testCase := testCase{
name: ts.name,
templateVersionActive: templateVersionActive,
prebuildLatestTransition: prebuildLatestTransition,
prebuildJobStatus: prebuildJobStatus,
templateDeleted: templateDeleted,
useBrokenPubsub: useBrokenPubsub,
shouldCreateNewPrebuild: ts.shouldCreateNewPrebuild,
shouldDeleteOldPrebuild: ts.shouldDeleteOldPrebuild,
expectOrgMembership: ts.expectOrgMembership,
expectGroupMembership: ts.expectGroupMembership,
}
testCases = append(testCases, testCase)
}
}
}
}
}
return testCases
}
type testCase struct {
name string
prebuildLatestTransition database.WorkspaceTransition
prebuildJobStatus database.ProvisionerJobStatus
templateVersionActive bool
templateDeleted bool
useBrokenPubsub bool
shouldCreateNewPrebuild *bool
shouldDeleteOldPrebuild *bool
expectOrgMembership *bool
expectGroupMembership *bool
}
func (tc testCase) run(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
t.Cleanup(func() {
if t.Failed() {
t.Logf("failed to run test: %s", tc.name)
t.Logf("templateVersionActive: %t", tc.templateVersionActive)
t.Logf("prebuildLatestTransition: %s", tc.prebuildLatestTransition)
t.Logf("prebuildJobStatus: %s", tc.prebuildJobStatus)
}
})
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, tc.templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
1,
uuid.New().String(),
)
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
tc.prebuildLatestTransition,
tc.prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
setupTestDBPrebuildAntagonists(t, db, pubSub, org)
if !tc.templateVersionActive {
// Create a new template version and mark it as active
// This marks the template version that we care about as inactive
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
}
if tc.useBrokenPubsub {
pubSub = &brokenPublisher{Pubsub: pubSub}
}
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)
if tc.shouldCreateNewPrebuild != nil {
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if workspace.ID != prebuild.ID {
newPrebuildCount++
}
}
// This test configures a preset that desires one prebuild.
// In cases where new prebuilds should be created, there should be exactly one.
require.Equal(t, *tc.shouldCreateNewPrebuild, newPrebuildCount == 1)
}
if tc.shouldDeleteOldPrebuild != nil {
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: prebuild.ID,
})
require.NoError(t, err)
if *tc.shouldDeleteOldPrebuild {
require.Equal(t, 2, len(builds))
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
} else {
require.Equal(t, 1, len(builds))
require.Equal(t, tc.prebuildLatestTransition, builds[0].Transition)
}
}
}
})
}
// brokenPublisher is used to validate that Publish() calls which always fail do not affect the reconciler's behavior,
// since the messages published are not essential but merely advisory.
type brokenPublisher struct {
pubsub.Pubsub
}
// Publish deliberately fails.
// I'm explicitly _not_ checking for EventJobPosted (coderd/database/provisionerjobs/provisionerjobs.go) since that
// requires too much knowledge of the underlying implementation.
func (*brokenPublisher) Publish(event string, _ []byte) error {
// Mimick some work being done.
<-time.After(testutil.IntervalFast)
return xerrors.Errorf("failed to publish %q", event)
}
func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
t.Parallel()
prebuildLatestTransition := database.WorkspaceTransitionStart
prebuildJobStatus := database.ProvisionerJobStatusRunning
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
4,
uuid.New().String(),
)
preset2 := setupTestDBPreset(
t,
db,
templateVersionID,
10,
uuid.New().String(),
)
prebuildIDs := make([]uuid.UUID, 0)
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
prebuildLatestTransition,
prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
prebuildIDs = append(prebuildIDs, prebuild.ID)
}
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if slice.Contains(prebuildIDs, workspace.ID) {
continue
}
newPrebuildCount++
}
// NOTE: preset1 doesn't block creation of instances in preset2
require.Equal(t, preset2.DesiredInstances.Int32, int32(newPrebuildCount)) // nolint:gosec
}
}
func TestPrebuildScheduling(t *testing.T) {
t.Parallel()
templateDeleted := false
// The test includes 2 presets, each with 2 schedules.
// It checks that the number of created prebuilds match expectations for various provided times,
// based on the corresponding schedules.
testCases := []struct {
name string
// now specifies the current time.
now time.Time
// expected prebuild counts for preset1 and preset2, respectively.
expectedPrebuildCounts []int
}{
{
name: "Before the 1st schedule",
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 01:00:00 UTC"),
expectedPrebuildCounts: []int{1, 1},
},
{
name: "1st schedule",
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 03:00:00 UTC"),
expectedPrebuildCounts: []int{2, 1},
},
{
name: "2nd schedule",
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 07:00:00 UTC"),
expectedPrebuildCounts: []int{3, 1},
},
{
name: "3rd schedule",
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 11:00:00 UTC"),
expectedPrebuildCounts: []int{1, 4},
},
{
name: "4th schedule",
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 15:00:00 UTC"),
expectedPrebuildCounts: []int{1, 5},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
clock := quartz.NewMock(t)
clock.Set(tc.now)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset1 := setupTestDBPresetWithScheduling(
t,
db,
templateVersionID,
1,
uuid.New().String(),
"UTC",
)
preset2 := setupTestDBPresetWithScheduling(
t,
db,
templateVersionID,
1,
uuid.New().String(),
"UTC",
)
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
PresetID: preset1.ID,
CronExpression: "* 2-4 * * 1-5",
DesiredInstances: 2,
})
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
PresetID: preset1.ID,
CronExpression: "* 6-8 * * 1-5",
DesiredInstances: 3,
})
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
PresetID: preset2.ID,
CronExpression: "* 10-12 * * 1-5",
DesiredInstances: 4,
})
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
PresetID: preset2.ID,
CronExpression: "* 14-16 * * 1-5",
DesiredInstances: 5,
})
_, err := controller.ReconcileAll(ctx)
require.NoError(t, err)
// get workspace builds
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
workspaceIDs := make([]uuid.UUID, 0, len(workspaces))
for _, workspace := range workspaces {
workspaceIDs = append(workspaceIDs, workspace.ID)
}
workspaceBuilds, err := db.GetLatestWorkspaceBuildsByWorkspaceIDs(ctx, workspaceIDs)
require.NoError(t, err)
// calculate number of workspace builds per preset
var (
preset1PrebuildCount int
preset2PrebuildCount int
)
for _, workspaceBuild := range workspaceBuilds {
if preset1.ID == workspaceBuild.TemplateVersionPresetID.UUID {
preset1PrebuildCount++
}
if preset2.ID == workspaceBuild.TemplateVersionPresetID.UUID {
preset2PrebuildCount++
}
}
require.Equal(t, tc.expectedPrebuildCounts[0], preset1PrebuildCount)
require.Equal(t, tc.expectedPrebuildCounts[1], preset2PrebuildCount)
})
}
}
func TestInvalidPreset(t *testing.T) {
t.Parallel()
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
// Add required param, which is not set in preset. It means that creating of prebuild will constantly fail.
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersionID,
Name: "required-param",
Description: "required param to make sure creating prebuild will fail",
Type: "bool",
DefaultValue: "",
Required: true,
})
setupTestDBPreset(
t,
db,
templateVersionID,
1,
uuid.New().String(),
)
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
newPrebuildCount := len(workspaces)
// NOTE: we don't have any new prebuilds, because their creation constantly fails.
require.Equal(t, int32(0), int32(newPrebuildCount)) // nolint:gosec
}
}
func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
t.Parallel()
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
prebuiltWorkspace, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusSucceeded,
org.ID,
preset,
template.ID,
templateVersionID,
)
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
// make sure we have only one workspace
require.Equal(t, 1, len(workspaces))
// Create a new template version and mark it as active.
// This marks the previous template version as inactive.
templateVersionID = setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
// Add required param, which is not set in preset.
// It means that creating of new prebuilt workspace will fail, but we should be able to clean up old prebuilt workspaces.
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersionID,
Name: "required-param",
Description: "required param which isn't set in preset",
Type: "bool",
DefaultValue: "",
Required: true,
})
// Old prebuilt workspace should be deleted.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: prebuiltWorkspace.ID,
})
require.NoError(t, err)
// Make sure old prebuild workspace was deleted, despite it contains required parameter which isn't set in preset.
require.Equal(t, 2, len(builds))
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
}
func TestSkippingHardLimitedPresets(t *testing.T) {
t.Parallel()
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
testCases := []struct {
name string
hardLimit int64
isHardLimitHit bool
}{
{
name: "hard limit is hit - skip creation of prebuilt workspace",
hardLimit: 1,
isHardLimitHit: true,
},
{
name: "hard limit is not hit - try to create prebuilt workspace again",
hardLimit: 2,
isHardLimitHit: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{
FailureHardLimit: serpent.Int64(tc.hardLimit),
ReconciliationBackoffInterval: 0,
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Set up test environment with a template, version, and preset.
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
// Create a failed prebuild workspace that counts toward the hard failure limit.
setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusFailed,
org.ID,
preset,
template.ID,
templateVersionID,
)
// Verify initial state: one failed workspace exists.
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
workspaceCount := len(workspaces)
require.Equal(t, 1, workspaceCount)
// Verify initial state: metric is not set - meaning preset is not hard limited.
require.NoError(t, controller.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
clock.Advance(time.Nanosecond).MustWait(ctx)
// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// Verify the final state after reconciliation.
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
require.NoError(t, err)
if !tc.isHardLimitHit {
// When hard limit is not reached, a new workspace should be created.
require.Equal(t, 2, len(workspaces))
require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus)
// When hard limit is not reached, metric is not set.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
return
}
// When hard limit is reached, no new workspace should be created.
require.Equal(t, 1, len(workspaces))
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
// When hard limit is reached, metric is set to 1.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetGauge())
require.EqualValues(t, 1, metric.GetGauge().GetValue())
})
}
}
func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
t.Parallel()
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
testCases := []struct {
name string
hardLimit int64
createNewTemplateVersion bool
deleteTemplate bool
}{
{
// hard limit is hit - but we allow deletion of prebuilt workspace because it's outdated (new template version was created)
name: "new template version is created",
hardLimit: 1,
createNewTemplateVersion: true,
deleteTemplate: false,
},
{
// hard limit is hit - but we allow deletion of prebuilt workspace because template is deleted
name: "template is deleted",
hardLimit: 1,
createNewTemplateVersion: false,
deleteTemplate: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{
FailureHardLimit: serpent.Int64(tc.hardLimit),
ReconciliationBackoffInterval: 0,
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Set up test environment with a template, version, and preset.
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 2, uuid.New().String())
// Create a successful prebuilt workspace.
successfulWorkspace, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusSucceeded,
org.ID,
preset,
template.ID,
templateVersionID,
)
// Make sure that prebuilt workspaces created in such order: [successful, failed].
clock.Advance(time.Second).MustWait(ctx)
// Create a failed prebuilt workspace that counts toward the hard failure limit.
setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusFailed,
org.ID,
preset,
template.ID,
templateVersionID,
)
getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int {
jobStatusMap := make(map[database.ProvisionerJobStatus]int)
for _, workspace := range workspaces {
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: workspace.ID,
})
require.NoError(t, err)
for _, workspaceBuild := range workspaceBuilds {
job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
require.NoError(t, err)
jobStatusMap[job.JobStatus]++
}
}
return jobStatusMap
}
// Verify initial state: two workspaces exist, one successful, one failed.
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap := getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 2)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
// Verify initial state: metric is not set - meaning preset is not hard limited.
require.NoError(t, controller.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
clock.Advance(time.Nanosecond).MustWait(ctx)
// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// Verify the final state after reconciliation.
// When hard limit is reached, no new workspace should be created.
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap = getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 2)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
require.NoError(t, err)
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
// When hard limit is reached, metric is set to 1.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetGauge())
require.EqualValues(t, 1, metric.GetGauge().GetValue())
if tc.createNewTemplateVersion {
// Create a new template version and mark it as active
// This marks the template version that we care about as inactive
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
}
if tc.deleteTemplate {
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
UpdatedAt: dbtime.Now(),
}))
}
// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap = getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 3)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
// Pending job should be the job that deletes successful, but outdated prebuilt workspace.
// Prebuilt workspace MUST be deleted, despite the fact that preset is marked as hard limited.
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusPending])
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: successfulWorkspace.ID,
})
require.NoError(t, err)
require.Equal(t, 2, len(workspaceBuilds))
// Make sure that successfully created, but outdated prebuilt workspace was scheduled for deletion.
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
// Metric is deleted after preset became outdated.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
})
}
}
func TestRunLoop(t *testing.T) {
t.Parallel()
prebuildLatestTransition := database.WorkspaceTransitionStart
prebuildJobStatus := database.ProvisionerJobStatusRunning
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
backoffInterval := time.Minute
cfg := codersdk.PrebuildsConfig{
// Given: explicitly defined backoff configuration to validate timings.
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
ReconciliationInterval: serpent.Duration(time.Second),
}
// Do not ignore errors as we want a graceful stop
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
4,
uuid.New().String(),
)
preset2 := setupTestDBPreset(
t,
db,
templateVersionID,
10,
uuid.New().String(),
)
prebuildIDs := make([]uuid.UUID, 0)
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
prebuildLatestTransition,
prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
prebuildIDs = append(prebuildIDs, prebuild.ID)
}
getNewPrebuildCount := func() int32 {
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if slice.Contains(prebuildIDs, workspace.ID) {
continue
}
newPrebuildCount++
}
return int32(newPrebuildCount) // nolint:gosec
}
// we need to wait until ticker is initialized, and only then use clock.Advance()
// otherwise clock.Advance() will be ignored
trap := clock.Trap().NewTicker()
go reconciler.Run(ctx)
// wait until ticker is initialized
trap.MustWait(ctx).MustRelease(ctx)
// start 1st iteration of ReconciliationLoop
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
// wait until ReconcileAll is completed
// TODO: is it possible to avoid Eventually and replace it with quartz?
// Ideally to have all control on test-level, and be able to advance loop iterations from the test.
require.Eventually(t, func() bool {
newPrebuildCount := getNewPrebuildCount()
// NOTE: preset1 doesn't block creation of instances in preset2
return preset2.DesiredInstances.Int32 == newPrebuildCount
}, testutil.WaitShort, testutil.IntervalFast)
// setup one more preset with 5 prebuilds
preset3 := setupTestDBPreset(
t,
db,
templateVersionID,
5,
uuid.New().String(),
)
newPrebuildCount := getNewPrebuildCount()
// nothing changed, because we didn't trigger a new iteration of a loop
require.Equal(t, preset2.DesiredInstances.Int32, newPrebuildCount)
// start 2nd iteration of ReconciliationLoop
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
// wait until ReconcileAll is completed
require.Eventually(t, func() bool {
newPrebuildCount := getNewPrebuildCount()
// both prebuilds for preset2 and preset3 were created
return preset2.DesiredInstances.Int32+preset3.DesiredInstances.Int32 == newPrebuildCount
}, testutil.WaitShort, testutil.IntervalFast)
// gracefully stop the reconciliation loop
reconciler.Stop(ctx, nil)
}
// TestReconcilerLifecycle tests that a StoreReconciler can be stopped and a new one
// created to simulate the prebuilds feature being disabled and re-enabled.
func TestReconcilerLifecycle(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
logger := testutil.Logger(t)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
registry := prometheus.NewRegistry()
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
// Given: a running reconciler (simulating the prebuilds feature being enabled)
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
quartz.NewMock(t),
registry,
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// When: the reconciler is stopped (simulating the prebuilds feature being disabled)
reconciler.Stop(ctx, xerrors.New("entitlements change"))
// Then: a new reconciler can be created without error
// (simulating the prebuilds feature being re-enabled)
reconciler = prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
quartz.NewMock(t),
registry,
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Gracefully stop the reconciliation loop
reconciler.Stop(ctx, nil)
}
func TestFailedBuildBackoff(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitSuperLong)
// Setup.
clock := quartz.NewMock(t)
backoffInterval := time.Minute
cfg := codersdk.PrebuildsConfig{
// Given: explicitly defined backoff configuration to validate timings.
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
ReconciliationInterval: serpent.Duration(time.Second),
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Given: an active template version with presets and prebuilds configured.
const desiredInstances = 2
userID := uuid.New()
dbgen.User(t, db, database.User{
ID: userID,
})
org, template := setupTestDBTemplate(t, db, userID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, desiredInstances, "test")
for range desiredInstances {
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusFailed, org.ID, preset, template.ID, templateVersionID)
}
// When: determining what actions to take next, backoff is calculated because the prebuild is in a failed state.
snapshot, err := reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
require.Len(t, snapshot.Presets, 1)
presetState, err := snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state := presetState.CalculateState()
actions, err := reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
// Then: the backoff time is in the future, no prebuilds are running, and we won't create any new prebuilds.
require.EqualValues(t, 0, state.Actual)
require.EqualValues(t, 0, actions[0].Create)
require.EqualValues(t, desiredInstances, state.Desired)
require.True(t, clock.Now().Before(actions[0].BackoffUntil))
// Then: the backoff time is as expected based on the number of failed builds.
require.NotNil(t, presetState.Backoff)
require.EqualValues(t, desiredInstances, presetState.Backoff.NumFailed)
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
// When: advancing to the next tick which is still within the backoff time.
clock.Advance(cfg.ReconciliationInterval.Value())
// Then: the backoff interval will not have changed.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
newState := presetState.CalculateState()
newActions, err := reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(newActions))
require.EqualValues(t, 0, newState.Actual)
require.EqualValues(t, 0, newActions[0].Create)
require.EqualValues(t, desiredInstances, newState.Desired)
require.EqualValues(t, actions[0].BackoffUntil, newActions[0].BackoffUntil)
// When: advancing beyond the backoff time.
clock.Advance(clock.Until(actions[0].BackoffUntil.Add(time.Second)))
// Then: we will attempt to create a new prebuild.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state = presetState.CalculateState()
actions, err = reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
require.EqualValues(t, 0, state.Actual)
require.EqualValues(t, desiredInstances, state.Desired)
require.EqualValues(t, desiredInstances, actions[0].Create)
// When: the desired number of new prebuild are provisioned, but one fails again.
for i := 0; i < desiredInstances; i++ {
status := database.ProvisionerJobStatusFailed
if i == 1 {
status = database.ProvisionerJobStatusSucceeded
}
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, status, org.ID, preset, template.ID, templateVersionID)
}
// Then: the backoff time is roughly equal to two backoff intervals, since another build has failed.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state = presetState.CalculateState()
actions, err = reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
require.EqualValues(t, 1, state.Actual)
require.EqualValues(t, desiredInstances, state.Desired)
require.EqualValues(t, 0, actions[0].Create)
require.EqualValues(t, 3, presetState.Backoff.NumFailed)
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
}
func TestReconciliationLock(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitSuperLong)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
wg := sync.WaitGroup{}
mutex := sync.Mutex{}
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db,
ps,
cache,
codersdk.PrebuildsConfig{},
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(), noop.NewTracerProvider(),
10,
nil,
)
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
lockObtained := mutex.TryLock()
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
// If this mutex is ever locked at this point, then that means that the postgres lock is not being held while we're
// inside WithReconciliationLock, which is meant to hold the lock.
require.True(t, lockObtained)
// Sleep a bit to give reconcilers more time to contend for the lock
time.Sleep(time.Second)
defer mutex.Unlock()
return nil
})
}()
}
wg.Wait()
}
func TestTrackResourceReplacement(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitSuperLong)
// Setup.
clock := quartz.NewMock(t)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Given: a template admin to receive a notification.
templateAdmin := dbgen.User(t, db, database.User{
RBACRoles: []string{codersdk.RoleTemplateAdmin},
})
// Given: a prebuilt workspace.
userID := uuid.New()
dbgen.User(t, db, database.User{ID: userID})
org, template := setupTestDBTemplate(t, db, userID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, "b0rked")
prebuiltWorkspace, prebuild := setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusSucceeded, org.ID, preset, template.ID, templateVersionID)
// Given: no replacement has been tracked yet, we should not see a metric for it yet.
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
require.Nil(t, findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
}))
// When: a claim occurred and resource replacements are detected (_how_ is out of scope of this test).
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, []*sdkproto.ResourceReplacement{
{
Resource: "docker_container[0]",
Paths: []string{"env", "image"},
},
{
Resource: "docker_volume[0]",
Paths: []string{"name"},
},
})
// Then: a notification will be sent detailing the replacement(s).
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
// This is not an exhaustive check of the expected labels/data in the notification. This would tie the implementations
// too tightly together.
// All we need to validate is that a template of the right kind was sent, to the expected user, with some replacements.
if !assert.Equal(t, notification.TemplateID, notifications.TemplateWorkspaceResourceReplaced, "unexpected template") {
return false
}
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
return false
}
if !assert.Len(t, notification.Data["replacements"], 2, "unexpected replacements count") {
return false
}
return true
})
require.Len(t, matching, 1)
// Then: the metric will be incremented.
mf, err = registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetCounter())
require.EqualValues(t, 1, metric.GetCounter().GetValue())
}
func TestExpiredPrebuildsMultipleActions(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
running int
desired int32
expired int
extraneous int
created int
}{
// With 2 running prebuilds, none of which are expired, and the desired count is met,
// no deletions or creations should occur.
{
name: "no expired prebuilds - no actions taken",
running: 2,
desired: 2,
expired: 0,
extraneous: 0,
created: 0,
},
// With 2 running prebuilds, 1 of which is expired, the expired prebuild should be deleted,
// and one new prebuild should be created to maintain the desired count.
{
name: "one expired prebuild deleted and replaced",
running: 2,
desired: 2,
expired: 1,
extraneous: 0,
created: 1,
},
// With 2 running prebuilds, both expired, both should be deleted,
// and 2 new prebuilds created to match the desired count.
{
name: "all prebuilds expired all deleted and recreated",
running: 2,
desired: 2,
expired: 2,
extraneous: 0,
created: 2,
},
// With 4 running prebuilds, 2 of which are expired, and the desired count is 2,
// the expired prebuilds should be deleted. No new creations are needed
// since removing the expired ones brings actual = desired.
{
name: "expired prebuilds deleted to reach desired count",
running: 4,
desired: 2,
expired: 2,
extraneous: 0,
created: 0,
},
// With 4 running prebuilds (1 expired), and the desired count is 2,
// the first action should delete the expired one,
// and the second action should delete one additional (non-expired) prebuild
// to eliminate the remaining excess.
{
name: "expired prebuild deleted first, then extraneous",
running: 4,
desired: 2,
expired: 1,
extraneous: 1,
created: 0,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitLong)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, pubSub, cache, cfg, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Set up test environment with a template, version, and preset
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
ttlDuration := muchEarlier - time.Hour
ttl := int32(-ttlDuration.Seconds())
preset := setupTestDBPreset(t, db, templateVersionID, tc.desired, "b0rked", withTTL(ttl))
// The implementation uses time.Since(prebuild.CreatedAt) > ttl to check a prebuild expiration.
// Since our mock clock defaults to a fixed time, we must align it with the current time
// to ensure time-based logic works correctly in tests.
clock.Set(time.Now())
runningWorkspaces := make(map[string]database.WorkspaceTable)
nonExpiredWorkspaces := make([]database.WorkspaceTable, 0, tc.running-tc.expired)
expiredWorkspaces := make([]database.WorkspaceTable, 0, tc.expired)
expiredCount := 0
for r := range tc.running {
// Space out createdAt timestamps by 1 second to ensure deterministic ordering.
// This lets the test verify that the correct (oldest) extraneous prebuilds are deleted.
createdAt := muchEarlier + time.Duration(r)*time.Second
isExpired := false
if tc.expired > expiredCount {
// Set createdAt far enough in the past so that time.Since(createdAt) > TTL,
// ensuring the prebuild is treated as expired in the test.
createdAt = ttlDuration - 1*time.Minute
isExpired = true
expiredCount++
}
jobCreatedAt := clock.Now().Add(createdAt)
resp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: org.ID,
TemplateID: template.ID,
CreatedAt: jobCreatedAt,
}).Pubsub(pubSub).Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
Transition: database.WorkspaceTransitionStart,
}).Params(database.WorkspaceBuildParameter{
Name: "test",
Value: "test",
}).Do()
if isExpired {
expiredWorkspaces = append(expiredWorkspaces, resp.Workspace)
} else {
nonExpiredWorkspaces = append(nonExpiredWorkspaces, resp.Workspace)
}
runningWorkspaces[resp.Workspace.ID.String()] = resp.Workspace
}
getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int {
jobStatusMap := make(map[database.ProvisionerJobStatus]int)
for _, workspace := range workspaces {
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: workspace.ID,
})
require.NoError(t, err)
for _, workspaceBuild := range workspaceBuilds {
job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
require.NoError(t, err)
jobStatusMap[job.JobStatus]++
}
}
return jobStatusMap
}
// Assert that the build associated with the given workspace has a 'start' transition status.
isWorkspaceStarted := func(workspace database.WorkspaceTable) {
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: workspace.ID,
})
require.NoError(t, err)
require.Equal(t, 1, len(workspaceBuilds))
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[0].Transition)
}
// Assert that the workspace build history includes a 'start' followed by a 'delete' transition status.
isWorkspaceDeleted := func(workspace database.WorkspaceTable) {
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: workspace.ID,
})
require.NoError(t, err)
require.Equal(t, 2, len(workspaceBuilds))
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
}
// Verify that all running workspaces, whether expired or not, have successfully started.
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, tc.running, len(workspaces))
jobStatusMap := getJobStatusMap(workspaces)
require.Len(t, workspaces, tc.running)
require.Len(t, jobStatusMap, 1)
require.Equal(t, tc.running, jobStatusMap[database.ProvisionerJobStatusSucceeded])
// Assert that all running workspaces (expired and non-expired) have a 'start' transition state.
for _, workspace := range runningWorkspaces {
isWorkspaceStarted(workspace)
}
// Trigger reconciliation to process expired prebuilds and enforce desired state.
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
// Sort non-expired workspaces by CreatedAt in ascending order (oldest first)
sort.Slice(nonExpiredWorkspaces, func(i, j int) bool {
return nonExpiredWorkspaces[i].CreatedAt.Before(nonExpiredWorkspaces[j].CreatedAt)
})
// Verify the status of each non-expired workspace:
// - the oldest `tc.extraneous` should have been deleted (i.e., have a 'delete' transition),
// - while the remaining newer ones should still be running (i.e., have a 'start' transition).
extraneousCount := 0
for _, running := range nonExpiredWorkspaces {
if extraneousCount < tc.extraneous {
isWorkspaceDeleted(running)
extraneousCount++
} else {
isWorkspaceStarted(running)
}
}
require.Equal(t, tc.extraneous, extraneousCount)
// Verify that each expired workspace has a 'delete' transition recorded,
// confirming it was properly marked for cleanup after reconciliation.
for _, expired := range expiredWorkspaces {
isWorkspaceDeleted(expired)
}
// After handling expired prebuilds, if running < desired, new prebuilds should be created.
// Verify that the correct number of new prebuild workspaces were created and started.
allWorkspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
createdCount := 0
for _, workspace := range allWorkspaces {
if _, ok := runningWorkspaces[workspace.ID.String()]; !ok {
// Count and verify only the newly created workspaces (i.e., not part of the original running set)
isWorkspaceStarted(workspace)
createdCount++
}
}
require.Equal(t, tc.created, createdCount)
})
}
}
func TestCancelPendingPrebuilds(t *testing.T) {
t.Parallel()
t.Run("CancelPendingPrebuilds", func(t *testing.T) {
t.Parallel()
for _, tt := range []struct {
name string
setupBuild func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse
activeTemplateVersion bool
previouslyCanceled bool
previouslyCompleted bool
shouldCancel bool
}{
// Should cancel pending prebuild-related jobs from a non-active template version
{
name: "CancelsPendingPrebuildJobNonActiveVersion",
// Given: a pending prebuild job
setupBuild: func(t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: true,
},
// Should not cancel pending prebuild-related jobs from an active template version
{
name: "DoesNotCancelPendingPrebuildJobActiveVersion",
// Given: a pending prebuild job
setupBuild: func(t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: true,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel pending prebuild-related jobs associated to a second workspace build
{
name: "DoesNotCancelPendingPrebuildJobSecondBuild",
// Given: a pending prebuild job associated to a second workspace build
setupBuild: func(t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
BuildNumber: int32(2),
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel pending prebuild-related jobs of a different template
{
name: "DoesNotCancelPrebuildJobDifferentTemplate",
// Given: a pending prebuild job belonging to a different template
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: uuid.Nil,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel pending user workspace build jobs
{
name: "DoesNotCancelUserWorkspaceJob",
// Given: a pending user workspace build job
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
_, member := coderdtest.CreateAnotherUser(t, client, orgID, rbac.RoleMember())
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: member.ID,
OrganizationID: orgID,
TemplateID: uuid.Nil,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: member.ID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel pending prebuild-related jobs with a delete transition
{
name: "DoesNotCancelPrebuildJobDeleteTransition",
// Given: a pending prebuild job with a delete transition
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Pending().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
Transition: database.WorkspaceTransitionDelete,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel prebuild-related jobs already being processed by a provisioner
{
name: "DoesNotCancelRunningPrebuildJob",
// Given: a running prebuild job
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Starting().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
previouslyCanceled: false,
previouslyCompleted: false,
shouldCancel: false,
},
// Should not cancel already canceled prebuild-related jobs
{
name: "DoesNotCancelCanceledPrebuildJob",
// Given: a canceled prebuild job
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Canceled().Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
shouldCancel: false,
previouslyCanceled: true,
previouslyCompleted: true,
},
// Should not cancel completed prebuild-related jobs
{
name: "DoesNotCancelCompletedPrebuildJob",
// Given: a completed prebuild job
setupBuild: func(
t *testing.T,
db database.Store,
client *codersdk.Client,
orgID uuid.UUID,
templateID uuid.UUID,
templateVersionID uuid.UUID,
presetID uuid.NullUUID,
) dbfake.WorkspaceResponse {
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
}).Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: templateVersionID,
TemplateVersionPresetID: presetID,
}).Do()
},
activeTemplateVersion: false,
shouldCancel: false,
previouslyCanceled: false,
previouslyCompleted: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
// Set the clock to Monday, January 1st, 2024 at 8:00 AM UTC to keep the test deterministic
clock := quartz.NewMock(t)
clock.Set(time.Date(2024, 1, 1, 8, 0, 0, 0, time.UTC))
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
// Setup
db, ps := dbtestutil.NewDB(t)
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
// Explicitly not including provisioner daemons, as we don't want the jobs to be processed
// Jobs operations will be simulated via the database model
IncludeProvisionerDaemon: false,
Database: db,
Pubsub: ps,
Clock: clock,
})
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
owner := coderdtest.CreateFirstUser(t, client)
// Given: a template with a version containing a preset with 1 prebuild instance
nonActivePresetID := uuid.NullUUID{
UUID: uuid.New(),
Valid: true,
}
nonActiveTemplateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
OrganizationID: owner.OrganizationID,
CreatedBy: owner.UserID,
}).Preset(database.TemplateVersionPreset{
ID: nonActivePresetID.UUID,
DesiredInstances: sql.NullInt32{
Int32: 1,
Valid: true,
},
}).Do()
templateID := nonActiveTemplateVersion.Template.ID
// Given: a new active template version
activePresetID := uuid.NullUUID{
UUID: uuid.New(),
Valid: true,
}
activeTemplateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
OrganizationID: owner.OrganizationID,
CreatedBy: owner.UserID,
TemplateID: uuid.NullUUID{
UUID: templateID,
Valid: true,
},
}).Preset(database.TemplateVersionPreset{
ID: activePresetID.UUID,
DesiredInstances: sql.NullInt32{
Int32: 1,
Valid: true,
},
}).SkipCreateTemplate().Do()
var pendingWorkspace dbfake.WorkspaceResponse
if tt.activeTemplateVersion {
// Given: a prebuilt workspace, workspace build and respective provisioner job from an
// active template version
pendingWorkspace = tt.setupBuild(t, db, client,
owner.OrganizationID, templateID, activeTemplateVersion.TemplateVersion.ID, activePresetID)
} else {
// Given: a prebuilt workspace, workspace build and respective provisioner job from a
// non-active template version
pendingWorkspace = tt.setupBuild(t, db, client,
owner.OrganizationID, templateID, nonActiveTemplateVersion.TemplateVersion.ID, nonActivePresetID)
}
// Given: the new template version is promoted to active
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: templateID,
ActiveVersionID: activeTemplateVersion.TemplateVersion.ID,
})
require.NoError(t, err)
// When: the reconciliation loop is triggered
_, err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)
if tt.shouldCancel {
// Then: the pending prebuild job from non-active version should be canceled
cancelledJob, err := db.GetProvisionerJobByID(ctx, pendingWorkspace.Build.JobID)
require.NoError(t, err)
require.Equal(t, clock.Now().UTC(), cancelledJob.CanceledAt.Time.UTC())
require.Equal(t, clock.Now().UTC(), cancelledJob.CompletedAt.Time.UTC())
require.Equal(t, database.ProvisionerJobStatusCanceled, cancelledJob.JobStatus)
// Then: the workspace should be deleted
deletedWorkspace, err := db.GetWorkspaceByID(ctx, pendingWorkspace.Workspace.ID)
require.NoError(t, err)
require.True(t, deletedWorkspace.Deleted)
latestBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, deletedWorkspace.ID)
require.NoError(t, err)
require.Equal(t, database.WorkspaceTransitionDelete, latestBuild.Transition)
deleteJob, err := db.GetProvisionerJobByID(ctx, latestBuild.JobID)
require.NoError(t, err)
require.True(t, deleteJob.CompletedAt.Valid)
require.False(t, deleteJob.WorkerID.Valid)
require.Equal(t, database.ProvisionerJobStatusSucceeded, deleteJob.JobStatus)
} else {
// Then: the pending prebuild job should not be canceled
job, err := db.GetProvisionerJobByID(ctx, pendingWorkspace.Build.JobID)
require.NoError(t, err)
if !tt.previouslyCanceled {
require.Zero(t, job.CanceledAt.Time.UTC())
require.NotEqual(t, database.ProvisionerJobStatusCanceled, job.JobStatus)
}
if !tt.previouslyCompleted {
require.Zero(t, job.CompletedAt.Time.UTC())
}
// Then: the workspace should not be deleted
workspace, err := db.GetWorkspaceByID(ctx, pendingWorkspace.Workspace.ID)
require.NoError(t, err)
require.False(t, workspace.Deleted)
}
})
}
})
t.Run("CancelPendingPrebuildsMultipleTemplates", func(t *testing.T) {
t.Parallel()
createTemplateVersionWithPreset := func(
t *testing.T,
db database.Store,
orgID uuid.UUID,
userID uuid.UUID,
templateID uuid.UUID,
prebuiltInstances int32,
) (uuid.UUID, uuid.UUID, uuid.UUID) {
templatePreset := uuid.NullUUID{
UUID: uuid.New(),
Valid: true,
}
templateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
OrganizationID: orgID,
CreatedBy: userID,
TemplateID: uuid.NullUUID{
UUID: templateID,
Valid: true,
},
}).Preset(database.TemplateVersionPreset{
ID: templatePreset.UUID,
DesiredInstances: sql.NullInt32{
Int32: prebuiltInstances,
Valid: true,
},
}).Do()
return templateVersion.Template.ID, templateVersion.TemplateVersion.ID, templatePreset.UUID
}
setupPrebuilds := func(
t *testing.T,
db database.Store,
orgID uuid.UUID,
templateID uuid.UUID,
versionID uuid.UUID,
presetID uuid.UUID,
count int,
pending bool,
) []dbfake.WorkspaceResponse {
prebuilds := make([]dbfake.WorkspaceResponse, count)
for i := range count {
builder := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
OrganizationID: orgID,
TemplateID: templateID,
})
if pending {
builder = builder.Pending()
}
prebuilds[i] = builder.Seed(database.WorkspaceBuild{
InitiatorID: database.PrebuildsSystemUserID,
TemplateVersionID: versionID,
TemplateVersionPresetID: uuid.NullUUID{
UUID: presetID,
Valid: true,
},
}).Do()
}
return prebuilds
}
checkIfJobCanceledAndDeleted := func(
t *testing.T,
clock *quartz.Mock,
ctx context.Context,
db database.Store,
shouldBeCanceledAndDeleted bool,
prebuilds []dbfake.WorkspaceResponse,
) {
for _, prebuild := range prebuilds {
pendingJob, err := db.GetProvisionerJobByID(ctx, prebuild.Build.JobID)
require.NoError(t, err)
if shouldBeCanceledAndDeleted {
// Pending job should be canceled
require.Equal(t, database.ProvisionerJobStatusCanceled, pendingJob.JobStatus)
require.Equal(t, clock.Now().UTC(), pendingJob.CanceledAt.Time.UTC())
require.Equal(t, clock.Now().UTC(), pendingJob.CompletedAt.Time.UTC())
// Workspace should be deleted
deletedWorkspace, err := db.GetWorkspaceByID(ctx, prebuild.Workspace.ID)
require.NoError(t, err)
require.True(t, deletedWorkspace.Deleted)
latestBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, deletedWorkspace.ID)
require.NoError(t, err)
require.Equal(t, database.WorkspaceTransitionDelete, latestBuild.Transition)
deleteJob, err := db.GetProvisionerJobByID(ctx, latestBuild.JobID)
require.NoError(t, err)
require.True(t, deleteJob.CompletedAt.Valid)
require.False(t, deleteJob.WorkerID.Valid)
require.Equal(t, database.ProvisionerJobStatusSucceeded, deleteJob.JobStatus)
} else {
// Pending job should not be canceled
require.NotEqual(t, database.ProvisionerJobStatusCanceled, pendingJob.JobStatus)
require.Zero(t, pendingJob.CanceledAt.Time.UTC())
// Workspace should not be deleted
workspace, err := db.GetWorkspaceByID(ctx, prebuild.Workspace.ID)
require.NoError(t, err)
require.False(t, workspace.Deleted)
}
}
}
// Set the clock to Monday, January 1st, 2024 at 8:00 AM UTC to keep the test deterministic
clock := quartz.NewMock(t)
clock.Set(time.Date(2024, 1, 1, 8, 0, 0, 0, time.UTC))
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
// Setup
db, ps := dbtestutil.NewDB(t)
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
// Explicitly not including provisioner daemons, as we don't want the jobs to be processed
// Jobs operations will be simulated via the database model
IncludeProvisionerDaemon: false,
Database: db,
Pubsub: ps,
Clock: clock,
})
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
owner := coderdtest.CreateFirstUser(t, client)
// Given: template A with 2 versions
// Given: template A version v1: with a preset with 5 instances (2 running, 3 pending)
templateAID, templateAVersion1ID, templateAVersion1PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, uuid.Nil, 5)
templateAVersion1Running := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion1ID, templateAVersion1PresetID, 2, false)
templateAVersion1Pending := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion1ID, templateAVersion1PresetID, 3, true)
// Given: template A version v2 (active version): with a preset with 2 instances (1 running, 1 pending)
_, templateAVersion2ID, templateAVersion2PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateAID, 2)
templateAVersion2Running := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion2ID, templateAVersion2PresetID, 1, false)
templateAVersion2Pending := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion2ID, templateAVersion2PresetID, 1, true)
// Given: template B with 3 versions
// Given: template B version v1: with a preset with 3 instances (1 running, 2 pending)
templateBID, templateBVersion1ID, templateBVersion1PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, uuid.Nil, 3)
templateBVersion1Running := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion1ID, templateBVersion1PresetID, 1, false)
templateBVersion1Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion1ID, templateBVersion1PresetID, 2, true)
// Given: template B version v2: with a preset with 2 instances (2 pending)
_, templateBVersion2ID, templateBVersion2PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateBID, 2)
templateBVersion2Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion2ID, templateBVersion2PresetID, 2, true)
// Given: template B version v3 (active version): with a preset with 2 instances (1 running, 1 pending)
_, templateBVersion3ID, templateBVersion3PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateBID, 2)
templateBVersion3Running := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion3ID, templateBVersion3PresetID, 1, false)
templateBVersion3Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion3ID, templateBVersion3PresetID, 1, true)
// When: the reconciliation loop is executed
_, err := reconciler.ReconcileAll(ctx)
require.NoError(t, err)
// Then: template A version 1 running workspaces should not be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion1Running)
// Then: template A version 1 pending workspaces should be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateAVersion1Pending)
// Then: template A version 2 running and pending workspaces should not be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion2Running)
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion2Pending)
// Then: template B version 1 running workspaces should not be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion1Running)
// Then: template B version 1 pending workspaces should be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateBVersion1Pending)
// Then: template B version 2 pending workspaces should be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateBVersion2Pending)
// Then: template B version 3 running and pending workspaces should not be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion3Running)
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion3Pending)
})
}
func TestReconciliationStats(t *testing.T) {
t.Parallel()
// Setup
clock := quartz.NewReal()
db, ps := dbtestutil.NewDB(t)
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
Database: db,
Pubsub: ps,
Clock: clock,
})
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
clock,
registry,
fakeEnqueuer,
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
owner := coderdtest.CreateFirstUser(t, client)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()
// Create a template version with a preset
dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
OrganizationID: owner.OrganizationID,
CreatedBy: owner.UserID,
}).Preset(database.TemplateVersionPreset{
DesiredInstances: sql.NullInt32{
Int32: 1,
Valid: true,
},
}).Do()
// Verify that ReconcileAll tracks and returns elapsed time
start := time.Now()
stats, err := reconciler.ReconcileAll(ctx)
actualElapsed := time.Since(start)
require.NoError(t, err)
require.Greater(t, stats.Elapsed, time.Duration(0))
// Verify stats.Elapsed matches actual execution time
require.InDelta(t, actualElapsed.Milliseconds(), stats.Elapsed.Milliseconds(), 100)
// Verify reconciliation loop is not unexpectedly slow
require.Less(t, stats.Elapsed, 5*time.Second)
}
func newNoopEnqueuer() *notifications.NoopEnqueuer {
return notifications.NewNoopEnqueuer()
}
func newFakeEnqueuer() *notificationstest.FakeEnqueuer {
return notificationstest.NewFakeEnqueuer()
}
func newNoopUsageCheckerPtr() *atomic.Pointer[wsbuilder.UsageChecker] {
var noopUsageChecker wsbuilder.UsageChecker = wsbuilder.NoopUsageChecker{}
buildUsageChecker := atomic.Pointer[wsbuilder.UsageChecker]{}
buildUsageChecker.Store(&noopUsageChecker)
return &buildUsageChecker
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplate(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
) (
database.Organization,
database.Template,
) {
t.Helper()
org := dbgen.Organization(t, db, database.Organization{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return org, template
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplateWithinOrg(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
templateName string,
org database.Organization,
) database.Template {
t.Helper()
template := dbgen.Template(t, db, database.Template{
Name: templateName,
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return template
}
const (
earlier = -time.Hour
muchEarlier = -time.Hour * 2
)
func setupTestDBTemplateVersion(
ctx context.Context,
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
orgID uuid.UUID,
userID uuid.UUID,
templateID uuid.UUID,
) uuid.UUID {
t.Helper()
templateVersionJob := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
CreatedAt: clock.Now().Add(muchEarlier),
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
OrganizationID: orgID,
InitiatorID: userID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: templateID, Valid: true},
OrganizationID: orgID,
CreatedBy: userID,
JobID: templateVersionJob.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: templateID,
ActiveVersionID: templateVersion.ID,
}))
// Make sure immutable params don't break prebuilt workspace deletion logic
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersion.ID,
Name: "test",
Description: "required & immutable param",
Type: "string",
DefaultValue: "",
Required: true,
Mutable: false,
})
return templateVersion.ID
}
// Preset optional parameters.
// presetOptions defines a function type for modifying InsertPresetParams.
type presetOptions func(*database.InsertPresetParams)
// withTTL returns a presetOptions function that sets the invalidate_after_secs (TTL) field in InsertPresetParams.
func withTTL(ttl int32) presetOptions {
return func(p *database.InsertPresetParams) {
p.InvalidateAfterSecs = sql.NullInt32{Valid: true, Int32: ttl}
}
}
func setupTestDBPreset(
t *testing.T,
db database.Store,
templateVersionID uuid.UUID,
desiredInstances int32,
presetName string,
opts ...presetOptions,
) database.TemplateVersionPreset {
t.Helper()
insertPresetParams := database.InsertPresetParams{
TemplateVersionID: templateVersionID,
Name: presetName,
DesiredInstances: sql.NullInt32{
Valid: true,
Int32: desiredInstances,
},
}
// Apply optional parameters to insertPresetParams (e.g., TTL).
for _, opt := range opts {
opt(&insertPresetParams)
}
preset := dbgen.Preset(t, db, insertPresetParams)
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
return preset
}
func setupTestDBPresetWithScheduling(
t *testing.T,
db database.Store,
templateVersionID uuid.UUID,
desiredInstances int32,
presetName string,
schedulingTimezone string,
) database.TemplateVersionPreset {
t.Helper()
preset := dbgen.Preset(t, db, database.InsertPresetParams{
TemplateVersionID: templateVersionID,
Name: presetName,
DesiredInstances: sql.NullInt32{
Valid: true,
Int32: desiredInstances,
},
SchedulingTimezone: schedulingTimezone,
})
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
return preset
}
func setupTestDBPrebuild(
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
transition database.WorkspaceTransition,
prebuildStatus database.ProvisionerJobStatus,
orgID uuid.UUID,
preset database.TemplateVersionPreset,
templateID uuid.UUID,
templateVersionID uuid.UUID,
) (database.WorkspaceTable, database.WorkspaceBuild) {
t.Helper()
return setupTestDBWorkspace(t, clock, db, ps, transition, prebuildStatus, orgID, preset, templateID, templateVersionID, database.PrebuildsSystemUserID, database.PrebuildsSystemUserID)
}
func setupTestDBWorkspace(
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
transition database.WorkspaceTransition,
prebuildStatus database.ProvisionerJobStatus,
orgID uuid.UUID,
preset database.TemplateVersionPreset,
templateID uuid.UUID,
templateVersionID uuid.UUID,
initiatorID uuid.UUID,
ownerID uuid.UUID,
) (database.WorkspaceTable, database.WorkspaceBuild) {
t.Helper()
cancelledAt := sql.NullTime{}
completedAt := sql.NullTime{}
startedAt := sql.NullTime{}
if prebuildStatus != database.ProvisionerJobStatusPending {
startedAt = sql.NullTime{Time: clock.Now().Add(muchEarlier), Valid: true}
}
buildError := sql.NullString{}
if prebuildStatus == database.ProvisionerJobStatusFailed {
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
buildError = sql.NullString{String: "build failed", Valid: true}
}
switch prebuildStatus {
case database.ProvisionerJobStatusCanceling:
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
case database.ProvisionerJobStatusCanceled:
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
case database.ProvisionerJobStatusSucceeded:
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
default:
}
createdAt := clock.Now().Add(muchEarlier)
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
TemplateID: templateID,
OrganizationID: orgID,
OwnerID: ownerID,
Deleted: false,
CreatedAt: createdAt,
})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
InitiatorID: initiatorID,
CreatedAt: createdAt,
StartedAt: startedAt,
CompletedAt: completedAt,
CanceledAt: cancelledAt,
OrganizationID: orgID,
Error: buildError,
})
workspaceBuild := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
InitiatorID: initiatorID,
TemplateVersionID: templateVersionID,
JobID: job.ID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
Transition: transition,
CreatedAt: clock.Now(),
})
dbgen.WorkspaceBuildParameters(t, db, []database.WorkspaceBuildParameter{
{
WorkspaceBuildID: workspaceBuild.ID,
Name: "test",
Value: "test",
},
})
return workspace, workspaceBuild
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
require.NoError(t, err)
res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID})
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: res.ID,
})
// A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state.
// i.e. connected to the control plane and all startup scripts have run.
if eligible {
require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{
ID: agent.ID,
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true},
ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true},
}))
}
return agent
}
// setupTestDBAntagonists creates test antagonists that should not influence running prebuild workspace tests.
// 1. A stopped prebuilt workspace (STOP then START transitions, owned by
// prebuilds system user).
// 2. A running regular workspace (not owned by the prebuilds system user).
func setupTestDBPrebuildAntagonists(t *testing.T, db database.Store, ps pubsub.Pubsub, org database.Organization) {
t.Helper()
templateAdmin := dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
OrganizationID: org.ID,
UserID: templateAdmin.ID,
})
member := dbgen.User(t, db, database.User{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
OrganizationID: org.ID,
UserID: member.ID,
})
tpl := dbgen.Template(t, db, database.Template{
OrganizationID: org.ID,
CreatedBy: templateAdmin.ID,
})
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: templateAdmin.ID,
})
// 1) Stopped prebuilt workspace (owned by prebuilds system user)
stoppedPrebuild := dbgen.Workspace(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
TemplateID: tpl.ID,
Name: "prebuild-antagonist-stopped",
Deleted: false,
})
// STOP build (build number 2, most recent)
stoppedJob2 := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: database.PrebuildsSystemUserID,
Provisioner: database.ProvisionerTypeEcho,
Type: database.ProvisionerJobTypeWorkspaceBuild,
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-30 * time.Second), Valid: true},
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-20 * time.Second), Valid: true},
Error: sql.NullString{},
ErrorCode: sql.NullString{},
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: stoppedPrebuild.ID,
TemplateVersionID: tv.ID,
JobID: stoppedJob2.ID,
BuildNumber: 2,
Transition: database.WorkspaceTransitionStop,
InitiatorID: database.PrebuildsSystemUserID,
Reason: database.BuildReasonInitiator,
// Explicitly not using a preset here. This shouldn't normally be possible,
// but without this the reconciler will try to create a new prebuild for
// this preset, which will affect the tests.
TemplateVersionPresetID: uuid.NullUUID{},
})
// START build (build number 1, older)
stoppedJob1 := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: database.PrebuildsSystemUserID,
Provisioner: database.ProvisionerTypeEcho,
Type: database.ProvisionerJobTypeWorkspaceBuild,
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-60 * time.Second), Valid: true},
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-50 * time.Second), Valid: true},
Error: sql.NullString{},
ErrorCode: sql.NullString{},
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: stoppedPrebuild.ID,
TemplateVersionID: tv.ID,
JobID: stoppedJob1.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
InitiatorID: database.PrebuildsSystemUserID,
Reason: database.BuildReasonInitiator,
})
// 2) Running regular workspace (not owned by prebuilds system user)
regularWorkspace := dbgen.Workspace(t, db, database.WorkspaceTable{
OwnerID: member.ID,
TemplateID: tpl.ID,
Name: "antagonist-regular-workspace",
Deleted: false,
})
regularJob := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: member.ID,
Provisioner: database.ProvisionerTypeEcho,
Type: database.ProvisionerJobTypeWorkspaceBuild,
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-40 * time.Second), Valid: true},
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-30 * time.Second), Valid: true},
Error: sql.NullString{},
ErrorCode: sql.NullString{},
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: regularWorkspace.ID,
TemplateVersionID: tv.ID,
JobID: regularJob.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
InitiatorID: member.ID,
Reason: database.BuildReasonInitiator,
})
}
var allTransitions = []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
}
var allJobStatuses = []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusSucceeded,
database.ProvisionerJobStatusFailed,
database.ProvisionerJobStatusCanceled,
database.ProvisionerJobStatusCanceling,
}
func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus {
return slice.Filter(except, func(status database.ProvisionerJobStatus) bool {
return !slice.Contains(allJobStatuses, status)
})
}
func mustParseTime(t *testing.T, layout, value string) time.Time {
t.Helper()
parsedTime, err := time.Parse(layout, value)
require.NoError(t, err)
return parsedTime
}
func TestReconciliationRespectsPauseSetting(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
clock := quartz.NewMock(t)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, logger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
10,
nil,
)
// Setup a template with a preset that should create prebuilds
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, user.ID, template.ID)
_ = setupTestDBPreset(t, db, templateVersionID, 2, "test")
// Initially, reconciliation should create prebuilds
_, err := reconciler.ReconcileAll(ctx)
require.NoError(t, err)
// Verify that prebuilds were created
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Len(t, workspaces, 2, "should have created 2 prebuilds")
// Now pause prebuilds reconciliation
err = prebuilds.SetPrebuildsReconciliationPaused(ctx, db, true)
require.NoError(t, err)
// Delete the existing prebuilds to simulate a scenario where reconciliation would normally recreate them
for _, workspace := range workspaces {
err = db.UpdateWorkspaceDeletedByID(ctx, database.UpdateWorkspaceDeletedByIDParams{
ID: workspace.ID,
Deleted: true,
})
require.NoError(t, err)
}
// Verify prebuilds are deleted
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Len(t, workspaces, 0, "prebuilds should be deleted")
// Run reconciliation again - it should be paused and not recreate prebuilds
_, err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)
// Verify that no new prebuilds were created because reconciliation is paused
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Len(t, workspaces, 0, "should not create prebuilds when reconciliation is paused")
// Resume prebuilds reconciliation
err = prebuilds.SetPrebuildsReconciliationPaused(ctx, db, false)
require.NoError(t, err)
// Run reconciliation again - it should now recreate the prebuilds
_, err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)
// Verify that prebuilds were recreated
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Len(t, workspaces, 2, "should have recreated 2 prebuilds after resuming")
}
// BenchmarkReconcileAll_NoOps benchmarks the reconciliation loop with varying numbers
// of presets of inactive versions that require no reconciliation actions.
//
// This validates the performance benefit of the CanSkipReconciliation optimization,
// which avoids spawning goroutines for presets that don't need reconciliation actions.
//
// go test -bench='^BenchmarkReconcileAll_NoOps$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
func BenchmarkReconcileAll_NoOps(b *testing.B) {
benchCases := []struct {
name string
presetCount int
}{
{"100_presets", 100},
{"1000_presets", 1000},
{"5000_presets", 5000},
}
for _, bc := range benchCases {
b.Run(bc.name, func(b *testing.B) {
// Setup
ctx := context.Background()
logger := slog.Make()
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
// Database configuration set per replica (see cli/server.go).
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
maxOpenConns := 10
sqlDB.SetMaxOpenConns(maxOpenConns)
sqlDB.SetMaxIdleConns(3)
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, prebuildsLogger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
maxOpenConns,
nil,
)
org := dbgen.Organization(b, db, database.Organization{})
user := dbgen.User(b, db, database.User{})
for i := 0; i < bc.presetCount; i++ {
template := dbgen.Template(b, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
oldTV := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
dbgen.Preset(b, db, database.InsertPresetParams{
TemplateVersionID: oldTV.ID,
Name: "default",
DesiredInstances: sql.NullInt32{Int32: 2, Valid: true},
})
// Create new version without preset and make it active
newTV := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: template.ID,
ActiveVersionID: newTV.ID,
})
require.NoError(b, err)
}
// Verify setup: all presets should be inactive with no work
// Get all presets from all templates
presets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
require.NoError(b, err)
require.Len(b, presets, bc.presetCount)
// Should have no prebuilt workspaces
workspaces, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
OwnerID: database.PrebuildsSystemUserID,
})
require.NoError(b, err)
require.Empty(b, workspaces)
// Benchmark the reconciliation loop
b.ResetTimer()
for i := 0; i < b.N; i++ {
stats, err := controller.ReconcileAll(ctx)
require.NoError(b, err)
_ = stats
}
})
}
}
// BenchmarkReconcileAll_ConnectionContention benchmarks the reconciliation loop with varying
// levels of database connection contention.
//
// This measures reconciliation time under heavy database load, where each preset
// needs to create multiple prebuilt workspaces.
//
// go test -bench='^BenchmarkReconcileAll_ConnectionContention$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
func BenchmarkReconcileAll_ConnectionContention(b *testing.B) {
benchCases := []struct {
name string
presetsForReconciliation int
desiredInstances int32
}{
{"10_presets_5_instances", 10, 5}, // 50 creates
{"50_presets_5_instances", 50, 5}, // 250 creates
{"100_presets_5_instances", 100, 5}, // 500 creates
{"1000_presets_10_instances", 1000, 10}, // 10000 creates
}
for _, bc := range benchCases {
b.Run(bc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
// Setup: Create a fresh database for each iteration because ReconcileAll
// creates prebuilds on the first run. Subsequent runs would see those
// prebuilds as "in progress" and skip creating new ones, making the
// benchmark results inconsistent.
ctx := context.Background()
logger := slog.Make()
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
// Database configuration set per replica (see cli/server.go).
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
maxOpenConns := 10
sqlDB.SetMaxOpenConns(maxOpenConns)
sqlDB.SetMaxIdleConns(3)
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, prebuildsLogger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
maxOpenConns,
nil,
)
// Create presets from active template versions that need reconciliation actions
org := dbgen.Organization(b, db, database.Organization{})
user := dbgen.User(b, db, database.User{})
for p := 0; p < bc.presetsForReconciliation; p++ {
template := dbgen.Template(b, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
// Create a completed provisioner job for the template version.
// This is needed because workspace builds copy the StorageMethod and FileID
// from the template version's import job to know which Terraform files to use.
file := dbgen.File(b, db, database.File{
CreatedBy: user.ID,
Hash: uuid.NewString(), // Generate unique hash for each file
})
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: user.ID,
FileID: file.ID,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionImport,
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
})
tv := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
JobID: templateVersionJob.ID,
})
dbgen.Preset(b, db, database.InsertPresetParams{
TemplateVersionID: tv.ID,
Name: "default",
DesiredInstances: sql.NullInt32{Int32: bc.desiredInstances, Valid: true},
})
// Make this the active version
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: template.ID,
ActiveVersionID: tv.ID,
})
require.NoError(b, err)
}
// Verify setup: all presets should require reconciliation
// Get all presets from all templates
presets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
require.NoError(b, err)
require.Len(b, presets, bc.presetsForReconciliation)
b.StartTimer()
// Measure reconciliation
_, err = controller.ReconcileAll(ctx)
require.NoError(b, err)
b.StopTimer()
}
})
}
}
// BenchmarkReconcileAll_Mix benchmarks reconciliation performance when there are
// many total presets in the database, but only a small subset are active and need reconciliation.
//
// This validates that the reconciler efficiently filters to only active template versions and
// doesn't slow down proportionally with the total number of inactive presets.
//
// go test -bench='^BenchmarkReconcileAll_Mix$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
func BenchmarkReconcileAll_Mix(b *testing.B) {
benchCases := []struct {
name string
inactivePresetsCount int // Presets on inactive template versions (noise)
activePresetsCount int // Presets on active versions that need work
desiredInstances int32 // Desired prebuilds per preset
}{
{"500_total_10_active", 490, 10, 2}, // 20 creates
{"1000_total_25_active", 975, 25, 2}, // 50 creates
{"5000_total_50_active", 4950, 50, 2}, // 100 creates
}
for _, bc := range benchCases {
b.Run(bc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
// Setup: Create a fresh database for each iteration because ReconcileAll
// creates prebuilds on the first run. Subsequent runs would see those
// prebuilds as "in progress" and skip creating new ones, making the
// benchmark results inconsistent.
ctx := context.Background()
logger := slog.Make()
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
// Database configuration set per replica (see cli/server.go).
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
maxOpenConns := 10
sqlDB.SetMaxOpenConns(maxOpenConns)
sqlDB.SetMaxIdleConns(3)
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
controller := prebuilds.NewStoreReconciler(
db, ps, cache, cfg, prebuildsLogger,
clock,
prometheus.NewRegistry(),
newNoopEnqueuer(),
newNoopUsageCheckerPtr(),
noop.NewTracerProvider(),
maxOpenConns,
nil,
)
org := dbgen.Organization(b, db, database.Organization{})
user := dbgen.User(b, db, database.User{})
// Create inactive presets (noise that should be filtered out efficiently)
// These are on templates with inactive versions
for p := 0; p < bc.inactivePresetsCount; p++ {
template := dbgen.Template(b, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
file := dbgen.File(b, db, database.File{
CreatedBy: user.ID,
Hash: fmt.Sprintf("inactive-%d", p),
})
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: user.ID,
FileID: file.ID,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionImport,
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
})
inactiveVersion := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
JobID: templateVersionJob.ID,
Name: fmt.Sprintf("inactive-v%d", p),
})
// Create presets on this inactive version
dbgen.Preset(b, db, database.InsertPresetParams{
TemplateVersionID: inactiveVersion.ID,
Name: "default",
DesiredInstances: sql.NullInt32{Int32: 2, Valid: true},
})
// Create a newer active version (making the above version inactive)
newerFile := dbgen.File(b, db, database.File{
CreatedBy: user.ID,
Hash: fmt.Sprintf("active-no-preset-%d", p),
})
newerJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: user.ID,
FileID: newerFile.ID,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionImport,
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
})
activeVersion := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
JobID: newerJob.ID,
Name: fmt.Sprintf("active-v%d", p),
})
// Make the newer version active (no presets = no reconciliation work)
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: template.ID,
ActiveVersionID: activeVersion.ID,
})
require.NoError(b, err)
}
// Create active presets that need reconciliation (missing prebuilds)
for p := 0; p < bc.activePresetsCount; p++ {
template := dbgen.Template(b, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
Name: fmt.Sprintf("needs-work-%d", p),
})
file := dbgen.File(b, db, database.File{
CreatedBy: user.ID,
Hash: fmt.Sprintf("needs-work-%d", p),
})
// Create a completed provisioner job for the template version.
// This is needed because workspace builds copy the StorageMethod and FileID
// from the template version's import job to know which Terraform files to use.
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
OrganizationID: org.ID,
InitiatorID: user.ID,
FileID: file.ID,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionImport,
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
})
tv := dbgen.TemplateVersion(b, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
JobID: templateVersionJob.ID,
})
dbgen.Preset(b, db, database.InsertPresetParams{
TemplateVersionID: tv.ID,
Name: "default",
DesiredInstances: sql.NullInt32{Int32: bc.desiredInstances, Valid: true},
})
// Make this the active version
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: template.ID,
ActiveVersionID: tv.ID,
})
require.NoError(b, err)
}
// Verify setup
allPresets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
require.NoError(b, err)
totalCount := bc.inactivePresetsCount + bc.activePresetsCount
require.Len(b, allPresets, totalCount, "total preset count should match")
// Count how many are actually active
activeCount := 0
for _, preset := range allPresets {
presetTemplate, err := db.GetTemplateByID(ctx, preset.TemplateID)
require.NoError(b, err)
if presetTemplate.ActiveVersionID == preset.TemplateVersionID {
activeCount++
}
}
require.Equal(b, bc.activePresetsCount, activeCount, "active preset count should match")
b.StartTimer()
// Measure reconciliation: should only process the active presets
_, err = controller.ReconcileAll(ctx)
require.NoError(b, err)
b.StopTimer()
}
})
}
}