mirror of
https://github.com/coder/coder.git
synced 2026-06-07 06:58:17 +00:00
ca234f346d
## Description - Updates `wsbuilder` to return a `BuildError` with `http.StatusBadRequest` to signify a "validation error" on missing or invalid parameters - Adds a short-circuit in `prebuilds.StoreReconciler` to mark presets for which creating a build returns a "validation error" as "validation failed" and skip further attempts to reconcile. - Adds a test to verify the above - Introduces a new Prometheus metric `coderd_prebuilt_workspaces_preset_validation_failed` to track the above Closes: https://github.com/coder/coder/issues/21237 --------- Co-authored-by: Cian Johnston <cian@coder.com>
3956 lines
136 KiB
Go
3956 lines
136 KiB
Go
package prebuilds_test
|
||
|
||
import (
|
||
"context"
|
||
"database/sql"
|
||
"fmt"
|
||
"sort"
|
||
"sync"
|
||
"sync/atomic"
|
||
"testing"
|
||
"time"
|
||
|
||
"github.com/google/uuid"
|
||
"github.com/prometheus/client_golang/prometheus"
|
||
"github.com/stretchr/testify/assert"
|
||
"github.com/stretchr/testify/require"
|
||
"go.opentelemetry.io/otel/trace/noop"
|
||
"golang.org/x/xerrors"
|
||
"tailscale.com/types/ptr"
|
||
|
||
"cdr.dev/slog/v3"
|
||
"cdr.dev/slog/v3/sloggers/slogtest"
|
||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||
"github.com/coder/coder/v2/coderd/database"
|
||
"github.com/coder/coder/v2/coderd/database/dbfake"
|
||
"github.com/coder/coder/v2/coderd/database/dbgen"
|
||
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||
"github.com/coder/coder/v2/coderd/database/pubsub"
|
||
"github.com/coder/coder/v2/coderd/files"
|
||
"github.com/coder/coder/v2/coderd/notifications"
|
||
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
|
||
"github.com/coder/coder/v2/coderd/rbac"
|
||
"github.com/coder/coder/v2/coderd/util/slice"
|
||
"github.com/coder/coder/v2/coderd/wsbuilder"
|
||
"github.com/coder/coder/v2/codersdk"
|
||
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
|
||
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
|
||
"github.com/coder/coder/v2/testutil"
|
||
"github.com/coder/quartz"
|
||
"github.com/coder/serpent"
|
||
)
|
||
|
||
func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
|
||
// Scenario: No reconciliation actions are taken if there are no presets
|
||
t.Parallel()
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitLong)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
logger := testutil.Logger(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// given a template version with no presets
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
user := dbgen.User(t, db, database.User{})
|
||
template := dbgen.Template(t, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
})
|
||
// verify that the db state is correct
|
||
gotTemplateVersion, err := db.GetTemplateVersionByID(ctx, templateVersion.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, templateVersion, gotTemplateVersion)
|
||
|
||
// when we trigger the reconciliation loop for all templates
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// then no reconciliation actions are taken
|
||
// because without presets, there are no prebuilds
|
||
// and without prebuilds, there is nothing to reconcile
|
||
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
|
||
require.NoError(t, err)
|
||
require.Empty(t, jobs)
|
||
}
|
||
|
||
func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
|
||
// Scenario: No reconciliation actions are taken if there are no prebuilds
|
||
t.Parallel()
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitLong)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
logger := testutil.Logger(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// given there are presets, but no prebuilds
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
user := dbgen.User(t, db, database.User{})
|
||
template := dbgen.Template(t, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
})
|
||
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
|
||
TemplateVersionID: templateVersion.ID,
|
||
Name: "test",
|
||
})
|
||
require.NoError(t, err)
|
||
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
|
||
TemplateVersionPresetID: preset.ID,
|
||
Names: []string{"test"},
|
||
Values: []string{"test"},
|
||
})
|
||
require.NoError(t, err)
|
||
|
||
// verify that the db state is correct
|
||
presetParameters, err := db.GetPresetParametersByTemplateVersionID(ctx, templateVersion.ID)
|
||
require.NoError(t, err)
|
||
require.NotEmpty(t, presetParameters)
|
||
|
||
// when we trigger the reconciliation loop for all templates
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// then no reconciliation actions are taken
|
||
// because without prebuilds, there is nothing to reconcile
|
||
// even if there are presets
|
||
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
|
||
require.NoError(t, err)
|
||
require.Empty(t, jobs)
|
||
}
|
||
|
||
func TestPrebuildReconciliation(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
testScenarios := []testScenario{
|
||
{
|
||
name: "never create prebuilds for inactive template versions",
|
||
prebuildLatestTransitions: allTransitions,
|
||
prebuildJobStatuses: allJobStatuses,
|
||
templateVersionActive: []bool{false},
|
||
shouldCreateNewPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "no need to create a new prebuild if one is already running",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStart,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusSucceeded,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
templateDeleted: []bool{false},
|
||
shouldCreateNewPrebuild: ptr.To(false),
|
||
},
|
||
{
|
||
name: "don't create a new prebuild if one is queued to build or already building",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStart,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusPending,
|
||
database.ProvisionerJobStatusRunning,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
shouldCreateNewPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "create a new prebuild if one is in a state that disqualifies it from ever being claimed",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStop,
|
||
database.WorkspaceTransitionDelete,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusPending,
|
||
database.ProvisionerJobStatusRunning,
|
||
database.ProvisionerJobStatusCanceling,
|
||
database.ProvisionerJobStatusSucceeded,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
shouldCreateNewPrebuild: ptr.To(true),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
// See TestFailedBuildBackoff for the start/failed case.
|
||
name: "create a new prebuild if one is in any kind of exceptional state",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStop,
|
||
database.WorkspaceTransitionDelete,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusCanceled,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
shouldCreateNewPrebuild: ptr.To(true),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
// TODO(ssncferreira): Investigate why the GetRunningPrebuiltWorkspaces query is returning 0 rows.
|
||
// When a template version is inactive (templateVersionActive = false), any prebuilds in the
|
||
// database.ProvisionerJobStatusRunning state should be deleted.
|
||
name: "never attempt to interfere with prebuilds from an active template version",
|
||
// The workspace builder does not allow scheduling a new build if there is already a build
|
||
// pending, running, or canceling. As such, we should never attempt to start, stop or delete
|
||
// such prebuilds. Rather, we should wait for the existing build to complete and reconcile
|
||
// again in the next cycle.
|
||
prebuildLatestTransitions: allTransitions,
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusPending,
|
||
database.ProvisionerJobStatusRunning,
|
||
database.ProvisionerJobStatusCanceling,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
shouldDeleteOldPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "never delete prebuilds in an exceptional state",
|
||
// We don't want to destroy evidence that might be useful to operators
|
||
// when troubleshooting issues. So we leave these prebuilds in place.
|
||
// Operators are expected to manually delete these prebuilds.
|
||
prebuildLatestTransitions: allTransitions,
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusCanceled,
|
||
database.ProvisionerJobStatusFailed,
|
||
},
|
||
templateVersionActive: []bool{true, false},
|
||
shouldDeleteOldPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "delete running prebuilds for inactive template versions",
|
||
// We only support prebuilds for active template versions.
|
||
// If a template version is inactive, we should delete any prebuilds
|
||
// that are running.
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStart,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusSucceeded,
|
||
},
|
||
templateVersionActive: []bool{false},
|
||
shouldDeleteOldPrebuild: ptr.To(true),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "don't delete running prebuilds for active template versions",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStart,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusSucceeded,
|
||
},
|
||
templateVersionActive: []bool{true},
|
||
shouldDeleteOldPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
name: "don't delete stopped or already deleted prebuilds",
|
||
// We don't ever stop prebuilds. A stopped prebuild is an exceptional state.
|
||
// As such we keep it, to allow operators to investigate the cause.
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStop,
|
||
database.WorkspaceTransitionDelete,
|
||
},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusSucceeded,
|
||
},
|
||
templateVersionActive: []bool{true, false},
|
||
shouldDeleteOldPrebuild: ptr.To(false),
|
||
templateDeleted: []bool{false},
|
||
},
|
||
{
|
||
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
|
||
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
|
||
// the template was soft-deleted any running prebuilds will be removed.
|
||
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
|
||
name: "soft-deleted templates MAY have prebuilds",
|
||
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
|
||
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
|
||
templateVersionActive: []bool{true, false},
|
||
shouldCreateNewPrebuild: ptr.To(false),
|
||
shouldDeleteOldPrebuild: ptr.To(true),
|
||
templateDeleted: []bool{true},
|
||
},
|
||
}
|
||
for _, tc := range testScenarios {
|
||
testCases := tc.testCases()
|
||
for _, tc := range testCases {
|
||
tc.run(t)
|
||
}
|
||
}
|
||
}
|
||
|
||
// testScenario is a collection of test cases that illustrate the same business rule.
|
||
// A testScenario describes a set of test properties for which the same test expecations
|
||
// hold. A testScenario may be decomposed into multiple testCase structs, which can then be run.
|
||
type testScenario struct {
|
||
name string
|
||
prebuildLatestTransitions []database.WorkspaceTransition
|
||
prebuildJobStatuses []database.ProvisionerJobStatus
|
||
templateVersionActive []bool
|
||
templateDeleted []bool
|
||
shouldCreateNewPrebuild *bool
|
||
shouldDeleteOldPrebuild *bool
|
||
expectOrgMembership *bool
|
||
expectGroupMembership *bool
|
||
}
|
||
|
||
func (ts testScenario) testCases() []testCase {
|
||
testCases := []testCase{}
|
||
for _, templateVersionActive := range ts.templateVersionActive {
|
||
for _, prebuildLatestTransition := range ts.prebuildLatestTransitions {
|
||
for _, prebuildJobStatus := range ts.prebuildJobStatuses {
|
||
for _, templateDeleted := range ts.templateDeleted {
|
||
for _, useBrokenPubsub := range []bool{true, false} {
|
||
testCase := testCase{
|
||
name: ts.name,
|
||
templateVersionActive: templateVersionActive,
|
||
prebuildLatestTransition: prebuildLatestTransition,
|
||
prebuildJobStatus: prebuildJobStatus,
|
||
templateDeleted: templateDeleted,
|
||
useBrokenPubsub: useBrokenPubsub,
|
||
shouldCreateNewPrebuild: ts.shouldCreateNewPrebuild,
|
||
shouldDeleteOldPrebuild: ts.shouldDeleteOldPrebuild,
|
||
expectOrgMembership: ts.expectOrgMembership,
|
||
expectGroupMembership: ts.expectGroupMembership,
|
||
}
|
||
testCases = append(testCases, testCase)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return testCases
|
||
}
|
||
|
||
type testCase struct {
|
||
name string
|
||
prebuildLatestTransition database.WorkspaceTransition
|
||
prebuildJobStatus database.ProvisionerJobStatus
|
||
templateVersionActive bool
|
||
templateDeleted bool
|
||
useBrokenPubsub bool
|
||
shouldCreateNewPrebuild *bool
|
||
shouldDeleteOldPrebuild *bool
|
||
expectOrgMembership *bool
|
||
expectGroupMembership *bool
|
||
}
|
||
|
||
func (tc testCase) run(t *testing.T) {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
t.Cleanup(func() {
|
||
if t.Failed() {
|
||
t.Logf("failed to run test: %s", tc.name)
|
||
t.Logf("templateVersionActive: %t", tc.templateVersionActive)
|
||
t.Logf("prebuildLatestTransition: %s", tc.prebuildLatestTransition)
|
||
t.Logf("prebuildJobStatus: %s", tc.prebuildJobStatus)
|
||
}
|
||
})
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, tc.templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(
|
||
ctx,
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
org.ID,
|
||
ownerID,
|
||
template.ID,
|
||
)
|
||
preset := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
1,
|
||
uuid.New().String(),
|
||
)
|
||
prebuild, _ := setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
tc.prebuildLatestTransition,
|
||
tc.prebuildJobStatus,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
|
||
setupTestDBPrebuildAntagonists(t, db, pubSub, org)
|
||
|
||
if !tc.templateVersionActive {
|
||
// Create a new template version and mark it as active
|
||
// This marks the template version that we care about as inactive
|
||
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
}
|
||
|
||
if tc.useBrokenPubsub {
|
||
pubSub = &brokenPublisher{Pubsub: pubSub}
|
||
}
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Run the reconciliation multiple times to ensure idempotency
|
||
// 8 was arbitrary, but large enough to reasonably trust the result
|
||
for i := 1; i <= 8; i++ {
|
||
_, err := controller.ReconcileAll(ctx)
|
||
require.NoErrorf(t, err, "failed on iteration %d", i)
|
||
|
||
if tc.shouldCreateNewPrebuild != nil {
|
||
newPrebuildCount := 0
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
for _, workspace := range workspaces {
|
||
if workspace.ID != prebuild.ID {
|
||
newPrebuildCount++
|
||
}
|
||
}
|
||
// This test configures a preset that desires one prebuild.
|
||
// In cases where new prebuilds should be created, there should be exactly one.
|
||
require.Equal(t, *tc.shouldCreateNewPrebuild, newPrebuildCount == 1)
|
||
}
|
||
|
||
if tc.shouldDeleteOldPrebuild != nil {
|
||
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: prebuild.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
if *tc.shouldDeleteOldPrebuild {
|
||
require.Equal(t, 2, len(builds))
|
||
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
|
||
} else {
|
||
require.Equal(t, 1, len(builds))
|
||
require.Equal(t, tc.prebuildLatestTransition, builds[0].Transition)
|
||
}
|
||
}
|
||
}
|
||
})
|
||
}
|
||
|
||
// brokenPublisher is used to validate that Publish() calls which always fail do not affect the reconciler's behavior,
|
||
// since the messages published are not essential but merely advisory.
|
||
type brokenPublisher struct {
|
||
pubsub.Pubsub
|
||
}
|
||
|
||
// Publish deliberately fails.
|
||
// I'm explicitly _not_ checking for EventJobPosted (coderd/database/provisionerjobs/provisionerjobs.go) since that
|
||
// requires too much knowledge of the underlying implementation.
|
||
func (*brokenPublisher) Publish(event string, _ []byte) error {
|
||
// Mimick some work being done.
|
||
<-time.After(testutil.IntervalFast)
|
||
return xerrors.Errorf("failed to publish %q", event)
|
||
}
|
||
|
||
// prebuildStoreWrapper wraps database.Store to inject errors for testing.
|
||
type prebuildStoreWrapper struct {
|
||
database.Store
|
||
insertProvisionerJobErr error
|
||
errorOnTemplateVersionID uuid.UUID
|
||
}
|
||
|
||
func (s prebuildStoreWrapper) InsertProvisionerJob(ctx context.Context, arg database.InsertProvisionerJobParams) (database.ProvisionerJob, error) {
|
||
if s.insertProvisionerJobErr != nil {
|
||
return database.ProvisionerJob{}, s.insertProvisionerJobErr
|
||
}
|
||
return s.Store.InsertProvisionerJob(ctx, arg)
|
||
}
|
||
|
||
func (s prebuildStoreWrapper) InsertWorkspaceBuild(ctx context.Context, arg database.InsertWorkspaceBuildParams) error {
|
||
if s.errorOnTemplateVersionID != uuid.Nil && arg.TemplateVersionID == s.errorOnTemplateVersionID {
|
||
return xerrors.Errorf("injected internal server error for template version %s", s.errorOnTemplateVersionID)
|
||
}
|
||
return s.Store.InsertWorkspaceBuild(ctx, arg)
|
||
}
|
||
|
||
func (s prebuildStoreWrapper) InTx(fn func(database.Store) error, opts *database.TxOptions) error {
|
||
return s.Store.InTx(func(tx database.Store) error {
|
||
return fn(prebuildStoreWrapper{
|
||
Store: tx,
|
||
insertProvisionerJobErr: s.insertProvisionerJobErr,
|
||
errorOnTemplateVersionID: s.errorOnTemplateVersionID,
|
||
})
|
||
}, opts)
|
||
}
|
||
|
||
func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
prebuildLatestTransition := database.WorkspaceTransitionStart
|
||
prebuildJobStatus := database.ProvisionerJobStatusRunning
|
||
templateDeleted := false
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(
|
||
ctx,
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
org.ID,
|
||
ownerID,
|
||
template.ID,
|
||
)
|
||
preset := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
4,
|
||
uuid.New().String(),
|
||
)
|
||
preset2 := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
10,
|
||
uuid.New().String(),
|
||
)
|
||
prebuildIDs := make([]uuid.UUID, 0)
|
||
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
|
||
prebuild, _ := setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
prebuildLatestTransition,
|
||
prebuildJobStatus,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
prebuildIDs = append(prebuildIDs, prebuild.ID)
|
||
}
|
||
|
||
// Run the reconciliation multiple times to ensure idempotency
|
||
// 8 was arbitrary, but large enough to reasonably trust the result
|
||
for i := 1; i <= 8; i++ {
|
||
_, err := controller.ReconcileAll(ctx)
|
||
require.NoErrorf(t, err, "failed on iteration %d", i)
|
||
|
||
newPrebuildCount := 0
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
for _, workspace := range workspaces {
|
||
if slice.Contains(prebuildIDs, workspace.ID) {
|
||
continue
|
||
}
|
||
newPrebuildCount++
|
||
}
|
||
|
||
// NOTE: preset1 doesn't block creation of instances in preset2
|
||
require.Equal(t, preset2.DesiredInstances.Int32, int32(newPrebuildCount)) // nolint:gosec
|
||
}
|
||
}
|
||
|
||
func TestPrebuildScheduling(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
templateDeleted := false
|
||
|
||
// The test includes 2 presets, each with 2 schedules.
|
||
// It checks that the number of created prebuilds match expectations for various provided times,
|
||
// based on the corresponding schedules.
|
||
testCases := []struct {
|
||
name string
|
||
// now specifies the current time.
|
||
now time.Time
|
||
// expected prebuild counts for preset1 and preset2, respectively.
|
||
expectedPrebuildCounts []int
|
||
}{
|
||
{
|
||
name: "Before the 1st schedule",
|
||
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 01:00:00 UTC"),
|
||
expectedPrebuildCounts: []int{1, 1},
|
||
},
|
||
{
|
||
name: "1st schedule",
|
||
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 03:00:00 UTC"),
|
||
expectedPrebuildCounts: []int{2, 1},
|
||
},
|
||
{
|
||
name: "2nd schedule",
|
||
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 07:00:00 UTC"),
|
||
expectedPrebuildCounts: []int{3, 1},
|
||
},
|
||
{
|
||
name: "3rd schedule",
|
||
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 11:00:00 UTC"),
|
||
expectedPrebuildCounts: []int{1, 4},
|
||
},
|
||
{
|
||
name: "4th schedule",
|
||
now: mustParseTime(t, time.RFC1123, "Mon, 02 Jun 2025 15:00:00 UTC"),
|
||
expectedPrebuildCounts: []int{1, 5},
|
||
},
|
||
}
|
||
|
||
for _, tc := range testCases {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
clock := quartz.NewMock(t)
|
||
clock.Set(tc.now)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(
|
||
ctx,
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
org.ID,
|
||
ownerID,
|
||
template.ID,
|
||
)
|
||
preset1 := setupTestDBPresetWithScheduling(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
1,
|
||
uuid.New().String(),
|
||
"UTC",
|
||
)
|
||
preset2 := setupTestDBPresetWithScheduling(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
1,
|
||
uuid.New().String(),
|
||
"UTC",
|
||
)
|
||
|
||
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
|
||
PresetID: preset1.ID,
|
||
CronExpression: "* 2-4 * * 1-5",
|
||
DesiredInstances: 2,
|
||
})
|
||
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
|
||
PresetID: preset1.ID,
|
||
CronExpression: "* 6-8 * * 1-5",
|
||
DesiredInstances: 3,
|
||
})
|
||
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
|
||
PresetID: preset2.ID,
|
||
CronExpression: "* 10-12 * * 1-5",
|
||
DesiredInstances: 4,
|
||
})
|
||
dbgen.PresetPrebuildSchedule(t, db, database.InsertPresetPrebuildScheduleParams{
|
||
PresetID: preset2.ID,
|
||
CronExpression: "* 14-16 * * 1-5",
|
||
DesiredInstances: 5,
|
||
})
|
||
|
||
_, err := controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// get workspace builds
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
workspaceIDs := make([]uuid.UUID, 0, len(workspaces))
|
||
for _, workspace := range workspaces {
|
||
workspaceIDs = append(workspaceIDs, workspace.ID)
|
||
}
|
||
workspaceBuilds, err := db.GetLatestWorkspaceBuildsByWorkspaceIDs(ctx, workspaceIDs)
|
||
require.NoError(t, err)
|
||
|
||
// calculate number of workspace builds per preset
|
||
var (
|
||
preset1PrebuildCount int
|
||
preset2PrebuildCount int
|
||
)
|
||
for _, workspaceBuild := range workspaceBuilds {
|
||
if preset1.ID == workspaceBuild.TemplateVersionPresetID.UUID {
|
||
preset1PrebuildCount++
|
||
}
|
||
if preset2.ID == workspaceBuild.TemplateVersionPresetID.UUID {
|
||
preset2PrebuildCount++
|
||
}
|
||
}
|
||
|
||
require.Equal(t, tc.expectedPrebuildCounts[0], preset1PrebuildCount)
|
||
require.Equal(t, tc.expectedPrebuildCounts[1], preset2PrebuildCount)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestInvalidPreset(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
templateDeleted := false
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(
|
||
ctx,
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
org.ID,
|
||
ownerID,
|
||
template.ID,
|
||
)
|
||
// Add required param, which is not set in preset. It means that creating of prebuild will constantly fail.
|
||
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
||
TemplateVersionID: templateVersionID,
|
||
Name: "required-param",
|
||
Description: "required param to make sure creating prebuild will fail",
|
||
Type: "bool",
|
||
DefaultValue: "",
|
||
Required: true,
|
||
})
|
||
setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
1,
|
||
uuid.New().String(),
|
||
)
|
||
|
||
// Run the reconciliation multiple times to ensure idempotency
|
||
// 8 was arbitrary, but large enough to reasonably trust the result
|
||
for i := 1; i <= 8; i++ {
|
||
_, err := controller.ReconcileAll(ctx)
|
||
require.NoErrorf(t, err, "failed on iteration %d", i)
|
||
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
newPrebuildCount := len(workspaces)
|
||
|
||
// NOTE: we don't have any new prebuilds, because their creation constantly fails.
|
||
require.Equal(t, int32(0), int32(newPrebuildCount)) // nolint:gosec
|
||
}
|
||
}
|
||
|
||
func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
templateDeleted := false
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
|
||
prebuiltWorkspace, _ := setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
database.WorkspaceTransitionStart,
|
||
database.ProvisionerJobStatusSucceeded,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
// make sure we have only one workspace
|
||
require.Equal(t, 1, len(workspaces))
|
||
|
||
// Create a new template version and mark it as active.
|
||
// This marks the previous template version as inactive.
|
||
templateVersionID = setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
// Add required param, which is not set in preset.
|
||
// It means that creating of new prebuilt workspace will fail, but we should be able to clean up old prebuilt workspaces.
|
||
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
||
TemplateVersionID: templateVersionID,
|
||
Name: "required-param",
|
||
Description: "required param which isn't set in preset",
|
||
Type: "bool",
|
||
DefaultValue: "",
|
||
Required: true,
|
||
})
|
||
|
||
// Old prebuilt workspace should be deleted.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: prebuiltWorkspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
// Make sure old prebuild workspace was deleted, despite it contains required parameter which isn't set in preset.
|
||
require.Equal(t, 2, len(builds))
|
||
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
|
||
}
|
||
|
||
func TestSkippingHardLimitedPresets(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
|
||
testCases := []struct {
|
||
name string
|
||
hardLimit int64
|
||
isHardLimitHit bool
|
||
}{
|
||
{
|
||
name: "hard limit is hit - skip creation of prebuilt workspace",
|
||
hardLimit: 1,
|
||
isHardLimitHit: true,
|
||
},
|
||
{
|
||
name: "hard limit is not hit - try to create prebuilt workspace again",
|
||
hardLimit: 2,
|
||
isHardLimitHit: false,
|
||
},
|
||
}
|
||
|
||
for _, tc := range testCases {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
templateDeleted := false
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
FailureHardLimit: serpent.Int64(tc.hardLimit),
|
||
ReconciliationBackoffInterval: 0,
|
||
}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Set up test environment with a template, version, and preset.
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
|
||
|
||
// Create a failed prebuild workspace that counts toward the hard failure limit.
|
||
setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
database.WorkspaceTransitionStart,
|
||
database.ProvisionerJobStatusFailed,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
|
||
// Verify initial state: one failed workspace exists.
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
workspaceCount := len(workspaces)
|
||
require.Equal(t, 1, workspaceCount)
|
||
|
||
// Verify initial state: metric is not set - meaning preset is not hard limited.
|
||
require.NoError(t, controller.ForceMetricsUpdate(ctx))
|
||
mf, err := registry.Gather()
|
||
require.NoError(t, err)
|
||
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.Nil(t, metric)
|
||
|
||
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
|
||
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
|
||
clock.Advance(time.Nanosecond).MustWait(ctx)
|
||
|
||
// Trigger reconciliation to attempt creating a new prebuild.
|
||
// The outcome depends on whether the hard limit has been reached.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// These two additional calls to ReconcileAll should not trigger any notifications.
|
||
// A notification is only sent once.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify the final state after reconciliation.
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
|
||
require.NoError(t, err)
|
||
|
||
if !tc.isHardLimitHit {
|
||
// When hard limit is not reached, a new workspace should be created.
|
||
require.Equal(t, 2, len(workspaces))
|
||
require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus)
|
||
|
||
// When hard limit is not reached, metric is not set.
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.Nil(t, metric)
|
||
return
|
||
}
|
||
|
||
// When hard limit is reached, no new workspace should be created.
|
||
require.Equal(t, 1, len(workspaces))
|
||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||
|
||
// When hard limit is reached, metric is set to 1.
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.NotNil(t, metric)
|
||
require.NotNil(t, metric.GetGauge())
|
||
require.EqualValues(t, 1, metric.GetGauge().GetValue())
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestValidationFailedPresets(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
// This test uses 5 presets sharing one DB to verify validation_failed behavior:
|
||
// | Preset | Setup | Expected After Reconcile |
|
||
// |--------|-----------------------------------------|-------------------------------------------|
|
||
// | A | Already validation_failed, desired=2 | Stays validation_failed, 0 workspaces |
|
||
// | B | Healthy, required param missing | Marked validation_failed, 0 workspaces |
|
||
// | C | Healthy, desired=3, required param | Marked validation_failed, 0 workspaces |
|
||
// | D | Healthy, DB wrapper injects 500 | Stays healthy, 0 workspaces |
|
||
// | E | Healthy, desired=1 (control) | Stays healthy, 1 workspaces |
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
registry := prometheus.NewRegistry()
|
||
|
||
// Set up shared test environment.
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
|
||
OrganizationID: org.ID,
|
||
UserID: ownerID,
|
||
})
|
||
|
||
// Helper to create template + version + optional required param.
|
||
createTemplate := func(name string, addRequiredParam bool) (database.Template, database.TemplateVersion) {
|
||
// First create the template (with a placeholder ActiveVersionID that we'll update).
|
||
tpl := dbgen.Template(t, db, database.Template{
|
||
OrganizationID: org.ID,
|
||
CreatedBy: ownerID,
|
||
Name: name,
|
||
})
|
||
|
||
// Now create the provisioner job and template version linked to the template.
|
||
job := dbgen.ProvisionerJob(t, db, pubSub, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
|
||
InitiatorID: ownerID,
|
||
})
|
||
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
JobID: job.ID,
|
||
CreatedBy: ownerID,
|
||
})
|
||
|
||
// Update template to point to this version as active.
|
||
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: tpl.ID,
|
||
ActiveVersionID: tv.ID,
|
||
}))
|
||
|
||
if addRequiredParam {
|
||
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
||
TemplateVersionID: tv.ID,
|
||
Name: "required_param",
|
||
Description: "required param to trigger validation failure",
|
||
Type: "bool",
|
||
DefaultValue: "",
|
||
Required: true,
|
||
})
|
||
}
|
||
return tpl, tv
|
||
}
|
||
|
||
// Create templates.
|
||
tplA, tvA := createTemplate("tpl-already-failed", false)
|
||
tplB, tvB := createTemplate("tpl-will-400", true)
|
||
tplC, tvC := createTemplate("tpl-multi-create", true)
|
||
tplD, tvD := createTemplate("tpl-will-500", false)
|
||
tplE, tvE := createTemplate("tpl-control", false)
|
||
|
||
// Create presets.
|
||
presetA := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tvA.ID,
|
||
Name: "preset-already-failed",
|
||
DesiredInstances: sql.NullInt32{Int32: 2, Valid: true},
|
||
})
|
||
// Mark preset A as validation_failed from the start.
|
||
err := db.UpdatePresetPrebuildStatus(ctx, database.UpdatePresetPrebuildStatusParams{
|
||
PresetID: presetA.ID,
|
||
Status: database.PrebuildStatusValidationFailed,
|
||
})
|
||
require.NoError(t, err)
|
||
|
||
presetB := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tvB.ID,
|
||
Name: "preset-will-400",
|
||
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
|
||
})
|
||
presetC := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tvC.ID,
|
||
Name: "preset-multi-create",
|
||
DesiredInstances: sql.NullInt32{Int32: 3, Valid: true},
|
||
})
|
||
presetD := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tvD.ID,
|
||
Name: "preset-will-500",
|
||
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
|
||
})
|
||
presetE := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tvE.ID,
|
||
Name: "preset-control",
|
||
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
|
||
})
|
||
|
||
// Wrap DB to inject 500 error for template D's version.
|
||
wrappedDB := prebuildStoreWrapper{
|
||
Store: db,
|
||
errorOnTemplateVersionID: tvD.ID,
|
||
}
|
||
|
||
controller := prebuilds.NewStoreReconciler(
|
||
wrappedDB, pubSub, cache, cfg, logger,
|
||
clock,
|
||
registry,
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// First reconcile: marks B, C as validation_failed.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Second reconcile: updates metrics with newly-failed presets
|
||
// (metrics are updated based on snapshot taken at the START of ReconcileAll).
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify preset states.
|
||
verifyPreset := func(presetID uuid.UUID, expectedStatus database.PrebuildStatus, templateID uuid.UUID, expectWorkspaces int) {
|
||
preset, err := db.GetPresetByID(ctx, presetID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, expectedStatus, preset.PrebuildStatus,
|
||
"preset %s should have status %s", preset.Name, expectedStatus)
|
||
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, templateID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, expectWorkspaces,
|
||
"template %s should have %d workspaces", templateID, expectWorkspaces)
|
||
}
|
||
|
||
// Preset A: already validation_failed, stays that way, no workspaces.
|
||
verifyPreset(presetA.ID, database.PrebuildStatusValidationFailed, tplA.ID, 0)
|
||
// Preset B: healthy -> validation_failed due to 400 (missing required param).
|
||
verifyPreset(presetB.ID, database.PrebuildStatusValidationFailed, tplB.ID, 0)
|
||
// Preset C: healthy -> validation_failed due to 400 (missing required param), even with 3 desired instances.
|
||
verifyPreset(presetC.ID, database.PrebuildStatusValidationFailed, tplC.ID, 0)
|
||
// Preset D: stays healthy because 500 error does not mark as validation_failed.
|
||
verifyPreset(presetD.ID, database.PrebuildStatusHealthy, tplD.ID, 0)
|
||
// Preset E: stays healthy (control)
|
||
verifyPreset(presetE.ID, database.PrebuildStatusHealthy, tplE.ID, 1)
|
||
|
||
// Verify metrics: A, B, C should have validation_failed metric set to 1.
|
||
require.NoError(t, controller.ForceMetricsUpdate(ctx))
|
||
mf, err := registry.Gather()
|
||
require.NoError(t, err)
|
||
|
||
// Helper to check metric value.
|
||
checkMetric := func(templateName, presetName string, expectSet bool) {
|
||
metric := findMetric(mf, prebuilds.MetricPresetValidationFailedGauge, map[string]string{
|
||
"template_name": templateName,
|
||
"preset_name": presetName,
|
||
"organization_name": org.Name,
|
||
})
|
||
if expectSet {
|
||
require.NotNil(t, metric, "metric should be set for preset %s", presetName)
|
||
require.NotNil(t, metric.GetGauge())
|
||
require.EqualValues(t, 1, metric.GetGauge().GetValue(),
|
||
"metric value should be 1 for preset %s", presetName)
|
||
} else {
|
||
require.Nil(t, metric, "metric should not be set for preset %s", presetName)
|
||
}
|
||
}
|
||
|
||
checkMetric(tplA.Name, presetA.Name, true)
|
||
checkMetric(tplB.Name, presetB.Name, true)
|
||
checkMetric(tplC.Name, presetC.Name, true)
|
||
checkMetric(tplD.Name, presetD.Name, false)
|
||
checkMetric(tplE.Name, presetE.Name, false)
|
||
}
|
||
|
||
// TestValidationFailedPresetResets verifies that when a preset is marked as
|
||
// validation_failed and a new template version is promoted, the new preset
|
||
// starts healthy and the validation_failed metric is cleared.
|
||
func TestValidationFailedPresetResets(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
registry := prometheus.NewRegistry()
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
|
||
OrganizationID: org.ID,
|
||
UserID: ownerID,
|
||
})
|
||
|
||
// Create a template with a required param that will cause validation failure.
|
||
tpl := dbgen.Template(t, db, database.Template{
|
||
OrganizationID: org.ID,
|
||
CreatedBy: ownerID,
|
||
Name: "tpl-version-reset",
|
||
})
|
||
|
||
job1 := dbgen.ProvisionerJob(t, db, pubSub, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
|
||
InitiatorID: ownerID,
|
||
})
|
||
tv1 := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
JobID: job1.ID,
|
||
CreatedBy: ownerID,
|
||
})
|
||
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: tpl.ID,
|
||
ActiveVersionID: tv1.ID,
|
||
}))
|
||
|
||
// Add a required param with no default, this triggers validation failure.
|
||
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
||
TemplateVersionID: tv1.ID,
|
||
Name: "required_param",
|
||
Description: "required param to trigger validation failure",
|
||
Type: "bool",
|
||
DefaultValue: "",
|
||
Required: true,
|
||
})
|
||
|
||
preset1 := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tv1.ID,
|
||
Name: "preset-test",
|
||
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
|
||
})
|
||
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
registry,
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// First reconcile: preset gets marked as validation_failed.
|
||
_, err := reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify preset is marked as validation_failed in the database.
|
||
updatedPreset, err := db.GetPresetByID(ctx, preset1.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.PrebuildStatusValidationFailed, updatedPreset.PrebuildStatus)
|
||
|
||
// Second reconcile: metrics snapshot picks up the validation_failed status.
|
||
_, err = reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify metric is set.
|
||
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
|
||
mf, err := registry.Gather()
|
||
require.NoError(t, err)
|
||
metric := findMetric(mf, prebuilds.MetricPresetValidationFailedGauge, map[string]string{
|
||
"template_name": tpl.Name,
|
||
"preset_name": preset1.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.NotNil(t, metric)
|
||
require.EqualValues(t, 1, metric.GetGauge().GetValue())
|
||
|
||
// Promote a new template version without the problematic param.
|
||
job2 := dbgen.ProvisionerJob(t, db, pubSub, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
|
||
InitiatorID: ownerID,
|
||
})
|
||
tv2 := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
JobID: job2.ID,
|
||
CreatedBy: ownerID,
|
||
})
|
||
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: tpl.ID,
|
||
ActiveVersionID: tv2.ID,
|
||
}))
|
||
|
||
// Create a preset on the new version.
|
||
preset2 := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: tv2.ID,
|
||
Name: "preset-test", // same name, new version
|
||
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
|
||
})
|
||
|
||
// Reconcile with the new version active.
|
||
_, err = reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Old preset stays validation_failed (it's now inactive, won't be reset).
|
||
oldPreset, err := db.GetPresetByID(ctx, preset1.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.PrebuildStatusValidationFailed, oldPreset.PrebuildStatus)
|
||
|
||
// New preset is healthy.
|
||
newPreset, err := db.GetPresetByID(ctx, preset2.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.PrebuildStatusHealthy, newPreset.PrebuildStatus)
|
||
|
||
// Metric should be cleared: the old preset is inactive, so it's no longer reported.
|
||
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric = findMetric(mf, prebuilds.MetricPresetValidationFailedGauge, map[string]string{
|
||
"template_name": tpl.Name,
|
||
"preset_name": preset1.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.Nil(t, metric)
|
||
|
||
// New preset should have a workspace created.
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, tpl.ID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, 1)
|
||
}
|
||
|
||
func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
|
||
testCases := []struct {
|
||
name string
|
||
hardLimit int64
|
||
createNewTemplateVersion bool
|
||
deleteTemplate bool
|
||
}{
|
||
{
|
||
// hard limit is hit - but we allow deletion of prebuilt workspace because it's outdated (new template version was created)
|
||
name: "new template version is created",
|
||
hardLimit: 1,
|
||
createNewTemplateVersion: true,
|
||
deleteTemplate: false,
|
||
},
|
||
{
|
||
// hard limit is hit - but we allow deletion of prebuilt workspace because template is deleted
|
||
name: "template is deleted",
|
||
hardLimit: 1,
|
||
createNewTemplateVersion: false,
|
||
deleteTemplate: true,
|
||
},
|
||
}
|
||
|
||
for _, tc := range testCases {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
FailureHardLimit: serpent.Int64(tc.hardLimit),
|
||
ReconciliationBackoffInterval: 0,
|
||
}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Set up test environment with a template, version, and preset.
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, false)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
preset := setupTestDBPreset(t, db, templateVersionID, 2, uuid.New().String())
|
||
|
||
// Create a successful prebuilt workspace.
|
||
successfulWorkspace, _ := setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
database.WorkspaceTransitionStart,
|
||
database.ProvisionerJobStatusSucceeded,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
|
||
// Make sure that prebuilt workspaces created in such order: [successful, failed].
|
||
clock.Advance(time.Second).MustWait(ctx)
|
||
|
||
// Create a failed prebuilt workspace that counts toward the hard failure limit.
|
||
setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
database.WorkspaceTransitionStart,
|
||
database.ProvisionerJobStatusFailed,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
|
||
getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int {
|
||
jobStatusMap := make(map[database.ProvisionerJobStatus]int)
|
||
for _, workspace := range workspaces {
|
||
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: workspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
|
||
for _, workspaceBuild := range workspaceBuilds {
|
||
job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
|
||
require.NoError(t, err)
|
||
jobStatusMap[job.JobStatus]++
|
||
}
|
||
}
|
||
return jobStatusMap
|
||
}
|
||
|
||
// Verify initial state: two workspaces exist, one successful, one failed.
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 2, len(workspaces))
|
||
jobStatusMap := getJobStatusMap(workspaces)
|
||
require.Len(t, jobStatusMap, 2)
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
|
||
|
||
// Verify initial state: metric is not set - meaning preset is not hard limited.
|
||
require.NoError(t, controller.ForceMetricsUpdate(ctx))
|
||
mf, err := registry.Gather()
|
||
require.NoError(t, err)
|
||
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.Nil(t, metric)
|
||
|
||
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
|
||
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
|
||
clock.Advance(time.Nanosecond).MustWait(ctx)
|
||
|
||
// Trigger reconciliation to attempt creating a new prebuild.
|
||
// The outcome depends on whether the hard limit has been reached.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// These two additional calls to ReconcileAll should not trigger any notifications.
|
||
// A notification is only sent once.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify the final state after reconciliation.
|
||
// When hard limit is reached, no new workspace should be created.
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 2, len(workspaces))
|
||
jobStatusMap = getJobStatusMap(workspaces)
|
||
require.Len(t, jobStatusMap, 2)
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
|
||
|
||
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||
|
||
// When hard limit is reached, metric is set to 1.
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.NotNil(t, metric)
|
||
require.NotNil(t, metric.GetGauge())
|
||
require.EqualValues(t, 1, metric.GetGauge().GetValue())
|
||
|
||
if tc.createNewTemplateVersion {
|
||
// Create a new template version and mark it as active
|
||
// This marks the template version that we care about as inactive
|
||
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
}
|
||
|
||
if tc.deleteTemplate {
|
||
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
|
||
ID: template.ID,
|
||
Deleted: true,
|
||
UpdatedAt: dbtime.Now(),
|
||
}))
|
||
}
|
||
|
||
// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 2, len(workspaces))
|
||
|
||
jobStatusMap = getJobStatusMap(workspaces)
|
||
require.Len(t, jobStatusMap, 3)
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
|
||
// Pending job should be the job that deletes successful, but outdated prebuilt workspace.
|
||
// Prebuilt workspace MUST be deleted, despite the fact that preset is marked as hard limited.
|
||
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusPending])
|
||
|
||
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: successfulWorkspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
require.Equal(t, 2, len(workspaceBuilds))
|
||
// Make sure that successfully created, but outdated prebuilt workspace was scheduled for deletion.
|
||
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
|
||
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
|
||
|
||
// Metric is deleted after preset became outdated.
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.Nil(t, metric)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestRunLoop(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
prebuildLatestTransition := database.WorkspaceTransitionStart
|
||
prebuildJobStatus := database.ProvisionerJobStatusRunning
|
||
templateDeleted := false
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
backoffInterval := time.Minute
|
||
cfg := codersdk.PrebuildsConfig{
|
||
// Given: explicitly defined backoff configuration to validate timings.
|
||
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
|
||
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
|
||
ReconciliationInterval: serpent.Duration(time.Second),
|
||
}
|
||
// Do not ignore errors as we want a graceful stop
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
||
templateVersionID := setupTestDBTemplateVersion(
|
||
ctx,
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
org.ID,
|
||
ownerID,
|
||
template.ID,
|
||
)
|
||
preset := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
4,
|
||
uuid.New().String(),
|
||
)
|
||
preset2 := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
10,
|
||
uuid.New().String(),
|
||
)
|
||
prebuildIDs := make([]uuid.UUID, 0)
|
||
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
|
||
prebuild, _ := setupTestDBPrebuild(
|
||
t,
|
||
clock,
|
||
db,
|
||
pubSub,
|
||
prebuildLatestTransition,
|
||
prebuildJobStatus,
|
||
org.ID,
|
||
preset,
|
||
template.ID,
|
||
templateVersionID,
|
||
)
|
||
prebuildIDs = append(prebuildIDs, prebuild.ID)
|
||
}
|
||
getNewPrebuildCount := func() int32 {
|
||
newPrebuildCount := 0
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
for _, workspace := range workspaces {
|
||
if slice.Contains(prebuildIDs, workspace.ID) {
|
||
continue
|
||
}
|
||
newPrebuildCount++
|
||
}
|
||
|
||
return int32(newPrebuildCount) // nolint:gosec
|
||
}
|
||
|
||
// we need to wait until ticker is initialized, and only then use clock.Advance()
|
||
// otherwise clock.Advance() will be ignored
|
||
trap := clock.Trap().NewTicker()
|
||
go reconciler.Run(ctx)
|
||
// wait until ticker is initialized
|
||
trap.MustWait(ctx).MustRelease(ctx)
|
||
// start 1st iteration of ReconciliationLoop
|
||
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
|
||
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
|
||
|
||
// wait until ReconcileAll is completed
|
||
// TODO: is it possible to avoid Eventually and replace it with quartz?
|
||
// Ideally to have all control on test-level, and be able to advance loop iterations from the test.
|
||
require.Eventually(t, func() bool {
|
||
newPrebuildCount := getNewPrebuildCount()
|
||
|
||
// NOTE: preset1 doesn't block creation of instances in preset2
|
||
return preset2.DesiredInstances.Int32 == newPrebuildCount
|
||
}, testutil.WaitShort, testutil.IntervalFast)
|
||
|
||
// setup one more preset with 5 prebuilds
|
||
preset3 := setupTestDBPreset(
|
||
t,
|
||
db,
|
||
templateVersionID,
|
||
5,
|
||
uuid.New().String(),
|
||
)
|
||
newPrebuildCount := getNewPrebuildCount()
|
||
// nothing changed, because we didn't trigger a new iteration of a loop
|
||
require.Equal(t, preset2.DesiredInstances.Int32, newPrebuildCount)
|
||
|
||
// start 2nd iteration of ReconciliationLoop
|
||
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
|
||
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
|
||
|
||
// wait until ReconcileAll is completed
|
||
require.Eventually(t, func() bool {
|
||
newPrebuildCount := getNewPrebuildCount()
|
||
|
||
// both prebuilds for preset2 and preset3 were created
|
||
return preset2.DesiredInstances.Int32+preset3.DesiredInstances.Int32 == newPrebuildCount
|
||
}, testutil.WaitShort, testutil.IntervalFast)
|
||
|
||
// gracefully stop the reconciliation loop
|
||
reconciler.Stop(ctx, nil)
|
||
}
|
||
|
||
// TestReconcilerLifecycle tests that a StoreReconciler can be stopped and a new one
|
||
// created to simulate the prebuilds feature being disabled and re-enabled.
|
||
func TestReconcilerLifecycle(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
ctx := testutil.Context(t, testutil.WaitLong)
|
||
logger := testutil.Logger(t)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
|
||
// Given: a running reconciler (simulating the prebuilds feature being enabled)
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
registry,
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// When: the reconciler is stopped (simulating the prebuilds feature being disabled)
|
||
reconciler.Stop(ctx, xerrors.New("entitlements change"))
|
||
|
||
// Then: a new reconciler can be created without error
|
||
// (simulating the prebuilds feature being re-enabled)
|
||
reconciler = prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
quartz.NewMock(t),
|
||
registry,
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Gracefully stop the reconciliation loop
|
||
reconciler.Stop(ctx, nil)
|
||
}
|
||
|
||
func TestFailedBuildBackoff(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||
|
||
// Setup.
|
||
clock := quartz.NewMock(t)
|
||
backoffInterval := time.Minute
|
||
cfg := codersdk.PrebuildsConfig{
|
||
// Given: explicitly defined backoff configuration to validate timings.
|
||
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
|
||
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
|
||
ReconciliationInterval: serpent.Duration(time.Second),
|
||
}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Given: an active template version with presets and prebuilds configured.
|
||
const desiredInstances = 2
|
||
userID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: userID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, userID, false)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
|
||
|
||
preset := setupTestDBPreset(t, db, templateVersionID, desiredInstances, "test")
|
||
for range desiredInstances {
|
||
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusFailed, org.ID, preset, template.ID, templateVersionID)
|
||
}
|
||
|
||
// When: determining what actions to take next, backoff is calculated because the prebuild is in a failed state.
|
||
snapshot, err := reconciler.SnapshotState(ctx, db)
|
||
require.NoError(t, err)
|
||
require.Len(t, snapshot.Presets, 1)
|
||
presetState, err := snapshot.FilterByPreset(preset.ID)
|
||
require.NoError(t, err)
|
||
state := presetState.CalculateState()
|
||
actions, err := reconciler.CalculateActions(ctx, *presetState)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 1, len(actions))
|
||
|
||
// Then: the backoff time is in the future, no prebuilds are running, and we won't create any new prebuilds.
|
||
require.EqualValues(t, 0, state.Actual)
|
||
require.EqualValues(t, 0, actions[0].Create)
|
||
require.EqualValues(t, desiredInstances, state.Desired)
|
||
require.True(t, clock.Now().Before(actions[0].BackoffUntil))
|
||
|
||
// Then: the backoff time is as expected based on the number of failed builds.
|
||
require.NotNil(t, presetState.Backoff)
|
||
require.EqualValues(t, desiredInstances, presetState.Backoff.NumFailed)
|
||
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
|
||
|
||
// When: advancing to the next tick which is still within the backoff time.
|
||
clock.Advance(cfg.ReconciliationInterval.Value())
|
||
|
||
// Then: the backoff interval will not have changed.
|
||
snapshot, err = reconciler.SnapshotState(ctx, db)
|
||
require.NoError(t, err)
|
||
presetState, err = snapshot.FilterByPreset(preset.ID)
|
||
require.NoError(t, err)
|
||
newState := presetState.CalculateState()
|
||
newActions, err := reconciler.CalculateActions(ctx, *presetState)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 1, len(newActions))
|
||
|
||
require.EqualValues(t, 0, newState.Actual)
|
||
require.EqualValues(t, 0, newActions[0].Create)
|
||
require.EqualValues(t, desiredInstances, newState.Desired)
|
||
require.EqualValues(t, actions[0].BackoffUntil, newActions[0].BackoffUntil)
|
||
|
||
// When: advancing beyond the backoff time.
|
||
clock.Advance(clock.Until(actions[0].BackoffUntil.Add(time.Second)))
|
||
|
||
// Then: we will attempt to create a new prebuild.
|
||
snapshot, err = reconciler.SnapshotState(ctx, db)
|
||
require.NoError(t, err)
|
||
presetState, err = snapshot.FilterByPreset(preset.ID)
|
||
require.NoError(t, err)
|
||
state = presetState.CalculateState()
|
||
actions, err = reconciler.CalculateActions(ctx, *presetState)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 1, len(actions))
|
||
|
||
require.EqualValues(t, 0, state.Actual)
|
||
require.EqualValues(t, desiredInstances, state.Desired)
|
||
require.EqualValues(t, desiredInstances, actions[0].Create)
|
||
|
||
// When: the desired number of new prebuild are provisioned, but one fails again.
|
||
for i := 0; i < desiredInstances; i++ {
|
||
status := database.ProvisionerJobStatusFailed
|
||
if i == 1 {
|
||
status = database.ProvisionerJobStatusSucceeded
|
||
}
|
||
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, status, org.ID, preset, template.ID, templateVersionID)
|
||
}
|
||
|
||
// Then: the backoff time is roughly equal to two backoff intervals, since another build has failed.
|
||
snapshot, err = reconciler.SnapshotState(ctx, db)
|
||
require.NoError(t, err)
|
||
presetState, err = snapshot.FilterByPreset(preset.ID)
|
||
require.NoError(t, err)
|
||
state = presetState.CalculateState()
|
||
actions, err = reconciler.CalculateActions(ctx, *presetState)
|
||
require.NoError(t, err)
|
||
require.Equal(t, 1, len(actions))
|
||
|
||
require.EqualValues(t, 1, state.Actual)
|
||
require.EqualValues(t, desiredInstances, state.Desired)
|
||
require.EqualValues(t, 0, actions[0].Create)
|
||
require.EqualValues(t, 3, presetState.Backoff.NumFailed)
|
||
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
|
||
}
|
||
|
||
func TestReconciliationLock(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
|
||
wg := sync.WaitGroup{}
|
||
mutex := sync.Mutex{}
|
||
for i := 0; i < 5; i++ {
|
||
wg.Add(1)
|
||
go func() {
|
||
defer wg.Done()
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db,
|
||
ps,
|
||
cache,
|
||
codersdk.PrebuildsConfig{},
|
||
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
|
||
quartz.NewMock(t),
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(), noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
|
||
lockObtained := mutex.TryLock()
|
||
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
|
||
// If this mutex is ever locked at this point, then that means that the postgres lock is not being held while we're
|
||
// inside WithReconciliationLock, which is meant to hold the lock.
|
||
require.True(t, lockObtained)
|
||
// Sleep a bit to give reconcilers more time to contend for the lock
|
||
time.Sleep(time.Second)
|
||
defer mutex.Unlock()
|
||
return nil
|
||
})
|
||
}()
|
||
}
|
||
wg.Wait()
|
||
}
|
||
|
||
func TestTrackResourceReplacement(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
||
|
||
// Setup.
|
||
clock := quartz.NewMock(t)
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Given: a template admin to receive a notification.
|
||
templateAdmin := dbgen.User(t, db, database.User{
|
||
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
||
})
|
||
|
||
// Given: a prebuilt workspace.
|
||
userID := uuid.New()
|
||
dbgen.User(t, db, database.User{ID: userID})
|
||
org, template := setupTestDBTemplate(t, db, userID, false)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
|
||
preset := setupTestDBPreset(t, db, templateVersionID, 1, "b0rked")
|
||
prebuiltWorkspace, prebuild := setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusSucceeded, org.ID, preset, template.ID, templateVersionID)
|
||
|
||
// Given: no replacement has been tracked yet, we should not see a metric for it yet.
|
||
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
|
||
mf, err := registry.Gather()
|
||
require.NoError(t, err)
|
||
require.Nil(t, findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
}))
|
||
|
||
// When: a claim occurred and resource replacements are detected (_how_ is out of scope of this test).
|
||
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, []*sdkproto.ResourceReplacement{
|
||
{
|
||
Resource: "docker_container[0]",
|
||
Paths: []string{"env", "image"},
|
||
},
|
||
{
|
||
Resource: "docker_volume[0]",
|
||
Paths: []string{"name"},
|
||
},
|
||
})
|
||
|
||
// Then: a notification will be sent detailing the replacement(s).
|
||
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
||
// This is not an exhaustive check of the expected labels/data in the notification. This would tie the implementations
|
||
// too tightly together.
|
||
// All we need to validate is that a template of the right kind was sent, to the expected user, with some replacements.
|
||
|
||
if !assert.Equal(t, notification.TemplateID, notifications.TemplateWorkspaceResourceReplaced, "unexpected template") {
|
||
return false
|
||
}
|
||
|
||
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
||
return false
|
||
}
|
||
|
||
if !assert.Len(t, notification.Data["replacements"], 2, "unexpected replacements count") {
|
||
return false
|
||
}
|
||
|
||
return true
|
||
})
|
||
require.Len(t, matching, 1)
|
||
|
||
// Then: the metric will be incremented.
|
||
mf, err = registry.Gather()
|
||
require.NoError(t, err)
|
||
metric := findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
|
||
"template_name": template.Name,
|
||
"preset_name": preset.Name,
|
||
"organization_name": org.Name,
|
||
})
|
||
require.NotNil(t, metric)
|
||
require.NotNil(t, metric.GetCounter())
|
||
require.EqualValues(t, 1, metric.GetCounter().GetValue())
|
||
}
|
||
|
||
func TestExpiredPrebuildsMultipleActions(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
testCases := []struct {
|
||
name string
|
||
running int
|
||
desired int32
|
||
expired int
|
||
extraneous int
|
||
created int
|
||
}{
|
||
// With 2 running prebuilds, none of which are expired, and the desired count is met,
|
||
// no deletions or creations should occur.
|
||
{
|
||
name: "no expired prebuilds - no actions taken",
|
||
running: 2,
|
||
desired: 2,
|
||
expired: 0,
|
||
extraneous: 0,
|
||
created: 0,
|
||
},
|
||
// With 2 running prebuilds, 1 of which is expired, the expired prebuild should be deleted,
|
||
// and one new prebuild should be created to maintain the desired count.
|
||
{
|
||
name: "one expired prebuild – deleted and replaced",
|
||
running: 2,
|
||
desired: 2,
|
||
expired: 1,
|
||
extraneous: 0,
|
||
created: 1,
|
||
},
|
||
// With 2 running prebuilds, both expired, both should be deleted,
|
||
// and 2 new prebuilds created to match the desired count.
|
||
{
|
||
name: "all prebuilds expired – all deleted and recreated",
|
||
running: 2,
|
||
desired: 2,
|
||
expired: 2,
|
||
extraneous: 0,
|
||
created: 2,
|
||
},
|
||
// With 4 running prebuilds, 2 of which are expired, and the desired count is 2,
|
||
// the expired prebuilds should be deleted. No new creations are needed
|
||
// since removing the expired ones brings actual = desired.
|
||
{
|
||
name: "expired prebuilds deleted to reach desired count",
|
||
running: 4,
|
||
desired: 2,
|
||
expired: 2,
|
||
extraneous: 0,
|
||
created: 0,
|
||
},
|
||
// With 4 running prebuilds (1 expired), and the desired count is 2,
|
||
// the first action should delete the expired one,
|
||
// and the second action should delete one additional (non-expired) prebuild
|
||
// to eliminate the remaining excess.
|
||
{
|
||
name: "expired prebuild deleted first, then extraneous",
|
||
running: 4,
|
||
desired: 2,
|
||
expired: 1,
|
||
extraneous: 1,
|
||
created: 0,
|
||
},
|
||
}
|
||
|
||
for _, tc := range testCases {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
clock := quartz.NewMock(t)
|
||
ctx := testutil.Context(t, testutil.WaitLong)
|
||
cfg := codersdk.PrebuildsConfig{}
|
||
logger := slogtest.Make(
|
||
t, &slogtest.Options{IgnoreErrors: true},
|
||
).Leveled(slog.LevelDebug)
|
||
db, pubSub := dbtestutil.NewDB(t)
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, pubSub, cache, cfg, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Set up test environment with a template, version, and preset
|
||
ownerID := uuid.New()
|
||
dbgen.User(t, db, database.User{
|
||
ID: ownerID,
|
||
})
|
||
org, template := setupTestDBTemplate(t, db, ownerID, false)
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
||
|
||
ttlDuration := muchEarlier - time.Hour
|
||
ttl := int32(-ttlDuration.Seconds())
|
||
preset := setupTestDBPreset(t, db, templateVersionID, tc.desired, "b0rked", withTTL(ttl))
|
||
|
||
// The implementation uses time.Since(prebuild.CreatedAt) > ttl to check a prebuild expiration.
|
||
// Since our mock clock defaults to a fixed time, we must align it with the current time
|
||
// to ensure time-based logic works correctly in tests.
|
||
clock.Set(time.Now())
|
||
|
||
runningWorkspaces := make(map[string]database.WorkspaceTable)
|
||
nonExpiredWorkspaces := make([]database.WorkspaceTable, 0, tc.running-tc.expired)
|
||
expiredWorkspaces := make([]database.WorkspaceTable, 0, tc.expired)
|
||
expiredCount := 0
|
||
for r := range tc.running {
|
||
// Space out createdAt timestamps by 1 second to ensure deterministic ordering.
|
||
// This lets the test verify that the correct (oldest) extraneous prebuilds are deleted.
|
||
createdAt := muchEarlier + time.Duration(r)*time.Second
|
||
isExpired := false
|
||
if tc.expired > expiredCount {
|
||
// Set createdAt far enough in the past so that time.Since(createdAt) > TTL,
|
||
// ensuring the prebuild is treated as expired in the test.
|
||
createdAt = ttlDuration - 1*time.Minute
|
||
isExpired = true
|
||
expiredCount++
|
||
}
|
||
|
||
jobCreatedAt := clock.Now().Add(createdAt)
|
||
resp := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: org.ID,
|
||
TemplateID: template.ID,
|
||
CreatedAt: jobCreatedAt,
|
||
}).Pubsub(pubSub).Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
|
||
Transition: database.WorkspaceTransitionStart,
|
||
}).Params(database.WorkspaceBuildParameter{
|
||
Name: "test",
|
||
Value: "test",
|
||
}).Do()
|
||
if isExpired {
|
||
expiredWorkspaces = append(expiredWorkspaces, resp.Workspace)
|
||
} else {
|
||
nonExpiredWorkspaces = append(nonExpiredWorkspaces, resp.Workspace)
|
||
}
|
||
runningWorkspaces[resp.Workspace.ID.String()] = resp.Workspace
|
||
}
|
||
|
||
getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int {
|
||
jobStatusMap := make(map[database.ProvisionerJobStatus]int)
|
||
for _, workspace := range workspaces {
|
||
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: workspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
|
||
for _, workspaceBuild := range workspaceBuilds {
|
||
job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
|
||
require.NoError(t, err)
|
||
jobStatusMap[job.JobStatus]++
|
||
}
|
||
}
|
||
return jobStatusMap
|
||
}
|
||
|
||
// Assert that the build associated with the given workspace has a 'start' transition status.
|
||
isWorkspaceStarted := func(workspace database.WorkspaceTable) {
|
||
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: workspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
require.Equal(t, 1, len(workspaceBuilds))
|
||
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[0].Transition)
|
||
}
|
||
|
||
// Assert that the workspace build history includes a 'start' followed by a 'delete' transition status.
|
||
isWorkspaceDeleted := func(workspace database.WorkspaceTable) {
|
||
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
||
WorkspaceID: workspace.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
require.Equal(t, 2, len(workspaceBuilds))
|
||
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
|
||
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
|
||
}
|
||
|
||
// Verify that all running workspaces, whether expired or not, have successfully started.
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, tc.running, len(workspaces))
|
||
jobStatusMap := getJobStatusMap(workspaces)
|
||
require.Len(t, workspaces, tc.running)
|
||
require.Len(t, jobStatusMap, 1)
|
||
require.Equal(t, tc.running, jobStatusMap[database.ProvisionerJobStatusSucceeded])
|
||
|
||
// Assert that all running workspaces (expired and non-expired) have a 'start' transition state.
|
||
for _, workspace := range runningWorkspaces {
|
||
isWorkspaceStarted(workspace)
|
||
}
|
||
|
||
// Trigger reconciliation to process expired prebuilds and enforce desired state.
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Sort non-expired workspaces by CreatedAt in ascending order (oldest first)
|
||
sort.Slice(nonExpiredWorkspaces, func(i, j int) bool {
|
||
return nonExpiredWorkspaces[i].CreatedAt.Before(nonExpiredWorkspaces[j].CreatedAt)
|
||
})
|
||
|
||
// Verify the status of each non-expired workspace:
|
||
// - the oldest `tc.extraneous` should have been deleted (i.e., have a 'delete' transition),
|
||
// - while the remaining newer ones should still be running (i.e., have a 'start' transition).
|
||
extraneousCount := 0
|
||
for _, running := range nonExpiredWorkspaces {
|
||
if extraneousCount < tc.extraneous {
|
||
isWorkspaceDeleted(running)
|
||
extraneousCount++
|
||
} else {
|
||
isWorkspaceStarted(running)
|
||
}
|
||
}
|
||
require.Equal(t, tc.extraneous, extraneousCount)
|
||
|
||
// Verify that each expired workspace has a 'delete' transition recorded,
|
||
// confirming it was properly marked for cleanup after reconciliation.
|
||
for _, expired := range expiredWorkspaces {
|
||
isWorkspaceDeleted(expired)
|
||
}
|
||
|
||
// After handling expired prebuilds, if running < desired, new prebuilds should be created.
|
||
// Verify that the correct number of new prebuild workspaces were created and started.
|
||
allWorkspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
|
||
createdCount := 0
|
||
for _, workspace := range allWorkspaces {
|
||
if _, ok := runningWorkspaces[workspace.ID.String()]; !ok {
|
||
// Count and verify only the newly created workspaces (i.e., not part of the original running set)
|
||
isWorkspaceStarted(workspace)
|
||
createdCount++
|
||
}
|
||
}
|
||
require.Equal(t, tc.created, createdCount)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestCancelPendingPrebuilds(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
t.Run("CancelPendingPrebuilds", func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
for _, tt := range []struct {
|
||
name string
|
||
setupBuild func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse
|
||
activeTemplateVersion bool
|
||
previouslyCanceled bool
|
||
previouslyCompleted bool
|
||
shouldCancel bool
|
||
}{
|
||
// Should cancel pending prebuild-related jobs from a non-active template version
|
||
{
|
||
name: "CancelsPendingPrebuildJobNonActiveVersion",
|
||
// Given: a pending prebuild job
|
||
setupBuild: func(t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: true,
|
||
},
|
||
// Should not cancel pending prebuild-related jobs from an active template version
|
||
{
|
||
name: "DoesNotCancelPendingPrebuildJobActiveVersion",
|
||
// Given: a pending prebuild job
|
||
setupBuild: func(t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: true,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel pending prebuild-related jobs associated to a second workspace build
|
||
{
|
||
name: "DoesNotCancelPendingPrebuildJobSecondBuild",
|
||
// Given: a pending prebuild job associated to a second workspace build
|
||
setupBuild: func(t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
BuildNumber: int32(2),
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel pending prebuild-related jobs of a different template
|
||
{
|
||
name: "DoesNotCancelPrebuildJobDifferentTemplate",
|
||
// Given: a pending prebuild job belonging to a different template
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: uuid.Nil,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel pending user workspace build jobs
|
||
{
|
||
name: "DoesNotCancelUserWorkspaceJob",
|
||
// Given: a pending user workspace build job
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
_, member := coderdtest.CreateAnotherUser(t, client, orgID, rbac.RoleMember())
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: member.ID,
|
||
OrganizationID: orgID,
|
||
TemplateID: uuid.Nil,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: member.ID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel pending prebuild-related jobs with a delete transition
|
||
{
|
||
name: "DoesNotCancelPrebuildJobDeleteTransition",
|
||
// Given: a pending prebuild job with a delete transition
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Pending().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
Transition: database.WorkspaceTransitionDelete,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel prebuild-related jobs already being processed by a provisioner
|
||
{
|
||
name: "DoesNotCancelRunningPrebuildJob",
|
||
// Given: a running prebuild job
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Starting().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: false,
|
||
shouldCancel: false,
|
||
},
|
||
// Should not cancel already canceled prebuild-related jobs
|
||
{
|
||
name: "DoesNotCancelCanceledPrebuildJob",
|
||
// Given: a canceled prebuild job
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Canceled().Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
shouldCancel: false,
|
||
previouslyCanceled: true,
|
||
previouslyCompleted: true,
|
||
},
|
||
// Should not cancel completed prebuild-related jobs
|
||
{
|
||
name: "DoesNotCancelCompletedPrebuildJob",
|
||
// Given: a completed prebuild job
|
||
setupBuild: func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
client *codersdk.Client,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
presetID uuid.NullUUID,
|
||
) dbfake.WorkspaceResponse {
|
||
return dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
}).Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: templateVersionID,
|
||
TemplateVersionPresetID: presetID,
|
||
}).Do()
|
||
},
|
||
activeTemplateVersion: false,
|
||
shouldCancel: false,
|
||
previouslyCanceled: false,
|
||
previouslyCompleted: true,
|
||
},
|
||
} {
|
||
t.Run(tt.name, func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
// Set the clock to Monday, January 1st, 2024 at 8:00 AM UTC to keep the test deterministic
|
||
clock := quartz.NewMock(t)
|
||
clock.Set(time.Date(2024, 1, 1, 8, 0, 0, 0, time.UTC))
|
||
|
||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||
defer cancel()
|
||
|
||
// Setup
|
||
db, ps := dbtestutil.NewDB(t)
|
||
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
|
||
// Explicitly not including provisioner daemons, as we don't want the jobs to be processed
|
||
// Jobs operations will be simulated via the database model
|
||
IncludeProvisionerDaemon: false,
|
||
Database: db,
|
||
Pubsub: ps,
|
||
Clock: clock,
|
||
})
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
owner := coderdtest.CreateFirstUser(t, client)
|
||
|
||
// Given: a template with a version containing a preset with 1 prebuild instance
|
||
nonActivePresetID := uuid.NullUUID{
|
||
UUID: uuid.New(),
|
||
Valid: true,
|
||
}
|
||
nonActiveTemplateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
|
||
OrganizationID: owner.OrganizationID,
|
||
CreatedBy: owner.UserID,
|
||
}).Preset(database.TemplateVersionPreset{
|
||
ID: nonActivePresetID.UUID,
|
||
DesiredInstances: sql.NullInt32{
|
||
Int32: 1,
|
||
Valid: true,
|
||
},
|
||
}).Do()
|
||
templateID := nonActiveTemplateVersion.Template.ID
|
||
|
||
// Given: a new active template version
|
||
activePresetID := uuid.NullUUID{
|
||
UUID: uuid.New(),
|
||
Valid: true,
|
||
}
|
||
activeTemplateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
|
||
OrganizationID: owner.OrganizationID,
|
||
CreatedBy: owner.UserID,
|
||
TemplateID: uuid.NullUUID{
|
||
UUID: templateID,
|
||
Valid: true,
|
||
},
|
||
}).Preset(database.TemplateVersionPreset{
|
||
ID: activePresetID.UUID,
|
||
DesiredInstances: sql.NullInt32{
|
||
Int32: 1,
|
||
Valid: true,
|
||
},
|
||
}).SkipCreateTemplate().Do()
|
||
|
||
var pendingWorkspace dbfake.WorkspaceResponse
|
||
if tt.activeTemplateVersion {
|
||
// Given: a prebuilt workspace, workspace build and respective provisioner job from an
|
||
// active template version
|
||
pendingWorkspace = tt.setupBuild(t, db, client,
|
||
owner.OrganizationID, templateID, activeTemplateVersion.TemplateVersion.ID, activePresetID)
|
||
} else {
|
||
// Given: a prebuilt workspace, workspace build and respective provisioner job from a
|
||
// non-active template version
|
||
pendingWorkspace = tt.setupBuild(t, db, client,
|
||
owner.OrganizationID, templateID, nonActiveTemplateVersion.TemplateVersion.ID, nonActivePresetID)
|
||
}
|
||
|
||
// Given: the new template version is promoted to active
|
||
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: templateID,
|
||
ActiveVersionID: activeTemplateVersion.TemplateVersion.ID,
|
||
})
|
||
require.NoError(t, err)
|
||
|
||
// When: the reconciliation loop is triggered
|
||
_, err = reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
if tt.shouldCancel {
|
||
// Then: the pending prebuild job from non-active version should be canceled
|
||
cancelledJob, err := db.GetProvisionerJobByID(ctx, pendingWorkspace.Build.JobID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, clock.Now().UTC(), cancelledJob.CanceledAt.Time.UTC())
|
||
require.Equal(t, clock.Now().UTC(), cancelledJob.CompletedAt.Time.UTC())
|
||
require.Equal(t, database.ProvisionerJobStatusCanceled, cancelledJob.JobStatus)
|
||
|
||
// Then: the workspace should be deleted
|
||
deletedWorkspace, err := db.GetWorkspaceByID(ctx, pendingWorkspace.Workspace.ID)
|
||
require.NoError(t, err)
|
||
require.True(t, deletedWorkspace.Deleted)
|
||
latestBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, deletedWorkspace.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.WorkspaceTransitionDelete, latestBuild.Transition)
|
||
deleteJob, err := db.GetProvisionerJobByID(ctx, latestBuild.JobID)
|
||
require.NoError(t, err)
|
||
require.True(t, deleteJob.CompletedAt.Valid)
|
||
require.False(t, deleteJob.WorkerID.Valid)
|
||
require.Equal(t, database.ProvisionerJobStatusSucceeded, deleteJob.JobStatus)
|
||
} else {
|
||
// Then: the pending prebuild job should not be canceled
|
||
job, err := db.GetProvisionerJobByID(ctx, pendingWorkspace.Build.JobID)
|
||
require.NoError(t, err)
|
||
if !tt.previouslyCanceled {
|
||
require.Zero(t, job.CanceledAt.Time.UTC())
|
||
require.NotEqual(t, database.ProvisionerJobStatusCanceled, job.JobStatus)
|
||
}
|
||
if !tt.previouslyCompleted {
|
||
require.Zero(t, job.CompletedAt.Time.UTC())
|
||
}
|
||
|
||
// Then: the workspace should not be deleted
|
||
workspace, err := db.GetWorkspaceByID(ctx, pendingWorkspace.Workspace.ID)
|
||
require.NoError(t, err)
|
||
require.False(t, workspace.Deleted)
|
||
}
|
||
})
|
||
}
|
||
})
|
||
|
||
t.Run("CancelPendingPrebuildsMultipleTemplates", func(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
createTemplateVersionWithPreset := func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
orgID uuid.UUID,
|
||
userID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
prebuiltInstances int32,
|
||
) (uuid.UUID, uuid.UUID, uuid.UUID) {
|
||
templatePreset := uuid.NullUUID{
|
||
UUID: uuid.New(),
|
||
Valid: true,
|
||
}
|
||
templateVersion := dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
|
||
OrganizationID: orgID,
|
||
CreatedBy: userID,
|
||
TemplateID: uuid.NullUUID{
|
||
UUID: templateID,
|
||
Valid: true,
|
||
},
|
||
}).Preset(database.TemplateVersionPreset{
|
||
ID: templatePreset.UUID,
|
||
DesiredInstances: sql.NullInt32{
|
||
Int32: prebuiltInstances,
|
||
Valid: true,
|
||
},
|
||
}).Do()
|
||
|
||
return templateVersion.Template.ID, templateVersion.TemplateVersion.ID, templatePreset.UUID
|
||
}
|
||
|
||
setupPrebuilds := func(
|
||
t *testing.T,
|
||
db database.Store,
|
||
orgID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
versionID uuid.UUID,
|
||
presetID uuid.UUID,
|
||
count int,
|
||
pending bool,
|
||
) []dbfake.WorkspaceResponse {
|
||
prebuilds := make([]dbfake.WorkspaceResponse, count)
|
||
for i := range count {
|
||
builder := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
OrganizationID: orgID,
|
||
TemplateID: templateID,
|
||
})
|
||
|
||
if pending {
|
||
builder = builder.Pending()
|
||
}
|
||
|
||
prebuilds[i] = builder.Seed(database.WorkspaceBuild{
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
TemplateVersionID: versionID,
|
||
TemplateVersionPresetID: uuid.NullUUID{
|
||
UUID: presetID,
|
||
Valid: true,
|
||
},
|
||
}).Do()
|
||
}
|
||
|
||
return prebuilds
|
||
}
|
||
|
||
checkIfJobCanceledAndDeleted := func(
|
||
t *testing.T,
|
||
clock *quartz.Mock,
|
||
ctx context.Context,
|
||
db database.Store,
|
||
shouldBeCanceledAndDeleted bool,
|
||
prebuilds []dbfake.WorkspaceResponse,
|
||
) {
|
||
for _, prebuild := range prebuilds {
|
||
pendingJob, err := db.GetProvisionerJobByID(ctx, prebuild.Build.JobID)
|
||
require.NoError(t, err)
|
||
|
||
if shouldBeCanceledAndDeleted {
|
||
// Pending job should be canceled
|
||
require.Equal(t, database.ProvisionerJobStatusCanceled, pendingJob.JobStatus)
|
||
require.Equal(t, clock.Now().UTC(), pendingJob.CanceledAt.Time.UTC())
|
||
require.Equal(t, clock.Now().UTC(), pendingJob.CompletedAt.Time.UTC())
|
||
|
||
// Workspace should be deleted
|
||
deletedWorkspace, err := db.GetWorkspaceByID(ctx, prebuild.Workspace.ID)
|
||
require.NoError(t, err)
|
||
require.True(t, deletedWorkspace.Deleted)
|
||
latestBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, deletedWorkspace.ID)
|
||
require.NoError(t, err)
|
||
require.Equal(t, database.WorkspaceTransitionDelete, latestBuild.Transition)
|
||
deleteJob, err := db.GetProvisionerJobByID(ctx, latestBuild.JobID)
|
||
require.NoError(t, err)
|
||
require.True(t, deleteJob.CompletedAt.Valid)
|
||
require.False(t, deleteJob.WorkerID.Valid)
|
||
require.Equal(t, database.ProvisionerJobStatusSucceeded, deleteJob.JobStatus)
|
||
} else {
|
||
// Pending job should not be canceled
|
||
require.NotEqual(t, database.ProvisionerJobStatusCanceled, pendingJob.JobStatus)
|
||
require.Zero(t, pendingJob.CanceledAt.Time.UTC())
|
||
|
||
// Workspace should not be deleted
|
||
workspace, err := db.GetWorkspaceByID(ctx, prebuild.Workspace.ID)
|
||
require.NoError(t, err)
|
||
require.False(t, workspace.Deleted)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Set the clock to Monday, January 1st, 2024 at 8:00 AM UTC to keep the test deterministic
|
||
clock := quartz.NewMock(t)
|
||
clock.Set(time.Date(2024, 1, 1, 8, 0, 0, 0, time.UTC))
|
||
|
||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||
defer cancel()
|
||
|
||
// Setup
|
||
db, ps := dbtestutil.NewDB(t)
|
||
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
|
||
// Explicitly not including provisioner daemons, as we don't want the jobs to be processed
|
||
// Jobs operations will be simulated via the database model
|
||
IncludeProvisionerDaemon: false,
|
||
Database: db,
|
||
Pubsub: ps,
|
||
Clock: clock,
|
||
})
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
owner := coderdtest.CreateFirstUser(t, client)
|
||
|
||
// Given: template A with 2 versions
|
||
// Given: template A version v1: with a preset with 5 instances (2 running, 3 pending)
|
||
templateAID, templateAVersion1ID, templateAVersion1PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, uuid.Nil, 5)
|
||
templateAVersion1Running := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion1ID, templateAVersion1PresetID, 2, false)
|
||
templateAVersion1Pending := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion1ID, templateAVersion1PresetID, 3, true)
|
||
// Given: template A version v2 (active version): with a preset with 2 instances (1 running, 1 pending)
|
||
_, templateAVersion2ID, templateAVersion2PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateAID, 2)
|
||
templateAVersion2Running := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion2ID, templateAVersion2PresetID, 1, false)
|
||
templateAVersion2Pending := setupPrebuilds(t, db, owner.OrganizationID, templateAID, templateAVersion2ID, templateAVersion2PresetID, 1, true)
|
||
|
||
// Given: template B with 3 versions
|
||
// Given: template B version v1: with a preset with 3 instances (1 running, 2 pending)
|
||
templateBID, templateBVersion1ID, templateBVersion1PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, uuid.Nil, 3)
|
||
templateBVersion1Running := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion1ID, templateBVersion1PresetID, 1, false)
|
||
templateBVersion1Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion1ID, templateBVersion1PresetID, 2, true)
|
||
// Given: template B version v2: with a preset with 2 instances (2 pending)
|
||
_, templateBVersion2ID, templateBVersion2PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateBID, 2)
|
||
templateBVersion2Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion2ID, templateBVersion2PresetID, 2, true)
|
||
// Given: template B version v3 (active version): with a preset with 2 instances (1 running, 1 pending)
|
||
_, templateBVersion3ID, templateBVersion3PresetID := createTemplateVersionWithPreset(t, db, owner.OrganizationID, owner.UserID, templateBID, 2)
|
||
templateBVersion3Running := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion3ID, templateBVersion3PresetID, 1, false)
|
||
templateBVersion3Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion3ID, templateBVersion3PresetID, 1, true)
|
||
|
||
// When: the reconciliation loop is executed
|
||
_, err := reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Then: template A version 1 running workspaces should not be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion1Running)
|
||
// Then: template A version 1 pending workspaces should be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateAVersion1Pending)
|
||
// Then: template A version 2 running and pending workspaces should not be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion2Running)
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion2Pending)
|
||
|
||
// Then: template B version 1 running workspaces should not be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion1Running)
|
||
// Then: template B version 1 pending workspaces should be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateBVersion1Pending)
|
||
// Then: template B version 2 pending workspaces should be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, true, templateBVersion2Pending)
|
||
// Then: template B version 3 running and pending workspaces should not be canceled
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion3Running)
|
||
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateBVersion3Pending)
|
||
})
|
||
}
|
||
|
||
func TestReconciliationStats(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
// Setup
|
||
clock := quartz.NewReal()
|
||
db, ps := dbtestutil.NewDB(t)
|
||
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
|
||
Database: db,
|
||
Pubsub: ps,
|
||
Clock: clock,
|
||
})
|
||
fakeEnqueuer := newFakeEnqueuer()
|
||
registry := prometheus.NewRegistry()
|
||
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
|
||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, codersdk.PrebuildsConfig{}, logger,
|
||
clock,
|
||
registry,
|
||
fakeEnqueuer,
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
owner := coderdtest.CreateFirstUser(t, client)
|
||
|
||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||
defer cancel()
|
||
|
||
// Create a template version with a preset
|
||
dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
|
||
OrganizationID: owner.OrganizationID,
|
||
CreatedBy: owner.UserID,
|
||
}).Preset(database.TemplateVersionPreset{
|
||
DesiredInstances: sql.NullInt32{
|
||
Int32: 1,
|
||
Valid: true,
|
||
},
|
||
}).Do()
|
||
|
||
// Verify that ReconcileAll tracks and returns elapsed time
|
||
start := time.Now()
|
||
stats, err := reconciler.ReconcileAll(ctx)
|
||
actualElapsed := time.Since(start)
|
||
require.NoError(t, err)
|
||
require.Greater(t, stats.Elapsed, time.Duration(0))
|
||
|
||
// Verify stats.Elapsed matches actual execution time
|
||
require.InDelta(t, actualElapsed.Milliseconds(), stats.Elapsed.Milliseconds(), 100)
|
||
// Verify reconciliation loop is not unexpectedly slow
|
||
require.Less(t, stats.Elapsed, 5*time.Second)
|
||
}
|
||
|
||
func newNoopEnqueuer() *notifications.NoopEnqueuer {
|
||
return notifications.NewNoopEnqueuer()
|
||
}
|
||
|
||
func newFakeEnqueuer() *notificationstest.FakeEnqueuer {
|
||
return notificationstest.NewFakeEnqueuer()
|
||
}
|
||
|
||
func newNoopUsageCheckerPtr() *atomic.Pointer[wsbuilder.UsageChecker] {
|
||
var noopUsageChecker wsbuilder.UsageChecker = wsbuilder.NoopUsageChecker{}
|
||
buildUsageChecker := atomic.Pointer[wsbuilder.UsageChecker]{}
|
||
buildUsageChecker.Store(&noopUsageChecker)
|
||
return &buildUsageChecker
|
||
}
|
||
|
||
// nolint:revive // It's a control flag, but this is a test.
|
||
func setupTestDBTemplate(
|
||
t *testing.T,
|
||
db database.Store,
|
||
userID uuid.UUID,
|
||
templateDeleted bool,
|
||
) (
|
||
database.Organization,
|
||
database.Template,
|
||
) {
|
||
t.Helper()
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
|
||
template := dbgen.Template(t, db, database.Template{
|
||
CreatedBy: userID,
|
||
OrganizationID: org.ID,
|
||
CreatedAt: time.Now().Add(muchEarlier),
|
||
})
|
||
if templateDeleted {
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
|
||
ID: template.ID,
|
||
Deleted: true,
|
||
}))
|
||
}
|
||
return org, template
|
||
}
|
||
|
||
// nolint:revive // It's a control flag, but this is a test.
|
||
func setupTestDBTemplateWithinOrg(
|
||
t *testing.T,
|
||
db database.Store,
|
||
userID uuid.UUID,
|
||
templateDeleted bool,
|
||
templateName string,
|
||
org database.Organization,
|
||
) database.Template {
|
||
t.Helper()
|
||
|
||
template := dbgen.Template(t, db, database.Template{
|
||
Name: templateName,
|
||
CreatedBy: userID,
|
||
OrganizationID: org.ID,
|
||
CreatedAt: time.Now().Add(muchEarlier),
|
||
})
|
||
if templateDeleted {
|
||
ctx := testutil.Context(t, testutil.WaitShort)
|
||
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
|
||
ID: template.ID,
|
||
Deleted: true,
|
||
}))
|
||
}
|
||
return template
|
||
}
|
||
|
||
const (
|
||
earlier = -time.Hour
|
||
muchEarlier = -time.Hour * 2
|
||
)
|
||
|
||
func setupTestDBTemplateVersion(
|
||
ctx context.Context,
|
||
t *testing.T,
|
||
clock quartz.Clock,
|
||
db database.Store,
|
||
ps pubsub.Pubsub,
|
||
orgID uuid.UUID,
|
||
userID uuid.UUID,
|
||
templateID uuid.UUID,
|
||
) uuid.UUID {
|
||
t.Helper()
|
||
templateVersionJob := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
||
CreatedAt: clock.Now().Add(muchEarlier),
|
||
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
|
||
OrganizationID: orgID,
|
||
InitiatorID: userID,
|
||
})
|
||
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: templateID, Valid: true},
|
||
OrganizationID: orgID,
|
||
CreatedBy: userID,
|
||
JobID: templateVersionJob.ID,
|
||
CreatedAt: time.Now().Add(muchEarlier),
|
||
})
|
||
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: templateID,
|
||
ActiveVersionID: templateVersion.ID,
|
||
}))
|
||
// Make sure immutable params don't break prebuilt workspace deletion logic
|
||
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
||
TemplateVersionID: templateVersion.ID,
|
||
Name: "test",
|
||
Description: "required & immutable param",
|
||
Type: "string",
|
||
DefaultValue: "",
|
||
Required: true,
|
||
Mutable: false,
|
||
})
|
||
return templateVersion.ID
|
||
}
|
||
|
||
// Preset optional parameters.
|
||
// presetOptions defines a function type for modifying InsertPresetParams.
|
||
type presetOptions func(*database.InsertPresetParams)
|
||
|
||
// withTTL returns a presetOptions function that sets the invalidate_after_secs (TTL) field in InsertPresetParams.
|
||
func withTTL(ttl int32) presetOptions {
|
||
return func(p *database.InsertPresetParams) {
|
||
p.InvalidateAfterSecs = sql.NullInt32{Valid: true, Int32: ttl}
|
||
}
|
||
}
|
||
|
||
func setupTestDBPreset(
|
||
t *testing.T,
|
||
db database.Store,
|
||
templateVersionID uuid.UUID,
|
||
desiredInstances int32,
|
||
presetName string,
|
||
opts ...presetOptions,
|
||
) database.TemplateVersionPreset {
|
||
t.Helper()
|
||
insertPresetParams := database.InsertPresetParams{
|
||
TemplateVersionID: templateVersionID,
|
||
Name: presetName,
|
||
DesiredInstances: sql.NullInt32{
|
||
Valid: true,
|
||
Int32: desiredInstances,
|
||
},
|
||
}
|
||
|
||
// Apply optional parameters to insertPresetParams (e.g., TTL).
|
||
for _, opt := range opts {
|
||
opt(&insertPresetParams)
|
||
}
|
||
|
||
preset := dbgen.Preset(t, db, insertPresetParams)
|
||
|
||
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
|
||
TemplateVersionPresetID: preset.ID,
|
||
Names: []string{"test"},
|
||
Values: []string{"test"},
|
||
})
|
||
return preset
|
||
}
|
||
|
||
func setupTestDBPresetWithScheduling(
|
||
t *testing.T,
|
||
db database.Store,
|
||
templateVersionID uuid.UUID,
|
||
desiredInstances int32,
|
||
presetName string,
|
||
schedulingTimezone string,
|
||
) database.TemplateVersionPreset {
|
||
t.Helper()
|
||
preset := dbgen.Preset(t, db, database.InsertPresetParams{
|
||
TemplateVersionID: templateVersionID,
|
||
Name: presetName,
|
||
DesiredInstances: sql.NullInt32{
|
||
Valid: true,
|
||
Int32: desiredInstances,
|
||
},
|
||
SchedulingTimezone: schedulingTimezone,
|
||
})
|
||
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
|
||
TemplateVersionPresetID: preset.ID,
|
||
Names: []string{"test"},
|
||
Values: []string{"test"},
|
||
})
|
||
return preset
|
||
}
|
||
|
||
func setupTestDBPrebuild(
|
||
t *testing.T,
|
||
clock quartz.Clock,
|
||
db database.Store,
|
||
ps pubsub.Pubsub,
|
||
transition database.WorkspaceTransition,
|
||
prebuildStatus database.ProvisionerJobStatus,
|
||
orgID uuid.UUID,
|
||
preset database.TemplateVersionPreset,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
) (database.WorkspaceTable, database.WorkspaceBuild) {
|
||
t.Helper()
|
||
return setupTestDBWorkspace(t, clock, db, ps, transition, prebuildStatus, orgID, preset, templateID, templateVersionID, database.PrebuildsSystemUserID, database.PrebuildsSystemUserID)
|
||
}
|
||
|
||
func setupTestDBWorkspace(
|
||
t *testing.T,
|
||
clock quartz.Clock,
|
||
db database.Store,
|
||
ps pubsub.Pubsub,
|
||
transition database.WorkspaceTransition,
|
||
prebuildStatus database.ProvisionerJobStatus,
|
||
orgID uuid.UUID,
|
||
preset database.TemplateVersionPreset,
|
||
templateID uuid.UUID,
|
||
templateVersionID uuid.UUID,
|
||
initiatorID uuid.UUID,
|
||
ownerID uuid.UUID,
|
||
) (database.WorkspaceTable, database.WorkspaceBuild) {
|
||
t.Helper()
|
||
cancelledAt := sql.NullTime{}
|
||
completedAt := sql.NullTime{}
|
||
|
||
startedAt := sql.NullTime{}
|
||
if prebuildStatus != database.ProvisionerJobStatusPending {
|
||
startedAt = sql.NullTime{Time: clock.Now().Add(muchEarlier), Valid: true}
|
||
}
|
||
|
||
buildError := sql.NullString{}
|
||
if prebuildStatus == database.ProvisionerJobStatusFailed {
|
||
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
||
buildError = sql.NullString{String: "build failed", Valid: true}
|
||
}
|
||
|
||
switch prebuildStatus {
|
||
case database.ProvisionerJobStatusCanceling:
|
||
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
||
case database.ProvisionerJobStatusCanceled:
|
||
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
||
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
||
case database.ProvisionerJobStatusSucceeded:
|
||
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
||
default:
|
||
}
|
||
|
||
createdAt := clock.Now().Add(muchEarlier)
|
||
|
||
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||
TemplateID: templateID,
|
||
OrganizationID: orgID,
|
||
OwnerID: ownerID,
|
||
Deleted: false,
|
||
CreatedAt: createdAt,
|
||
})
|
||
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
||
InitiatorID: initiatorID,
|
||
CreatedAt: createdAt,
|
||
StartedAt: startedAt,
|
||
CompletedAt: completedAt,
|
||
CanceledAt: cancelledAt,
|
||
OrganizationID: orgID,
|
||
Error: buildError,
|
||
})
|
||
workspaceBuild := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||
WorkspaceID: workspace.ID,
|
||
InitiatorID: initiatorID,
|
||
TemplateVersionID: templateVersionID,
|
||
JobID: job.ID,
|
||
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
|
||
Transition: transition,
|
||
CreatedAt: clock.Now(),
|
||
})
|
||
dbgen.WorkspaceBuildParameters(t, db, []database.WorkspaceBuildParameter{
|
||
{
|
||
WorkspaceBuildID: workspaceBuild.ID,
|
||
Name: "test",
|
||
Value: "test",
|
||
},
|
||
})
|
||
|
||
return workspace, workspaceBuild
|
||
}
|
||
|
||
// nolint:revive // It's a control flag, but this is a test.
|
||
func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
|
||
build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
|
||
require.NoError(t, err)
|
||
|
||
res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID})
|
||
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
||
ResourceID: res.ID,
|
||
})
|
||
|
||
// A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state.
|
||
// i.e. connected to the control plane and all startup scripts have run.
|
||
if eligible {
|
||
require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{
|
||
ID: agent.ID,
|
||
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
|
||
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true},
|
||
ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true},
|
||
}))
|
||
}
|
||
|
||
return agent
|
||
}
|
||
|
||
// setupTestDBAntagonists creates test antagonists that should not influence running prebuild workspace tests.
|
||
// 1. A stopped prebuilt workspace (STOP then START transitions, owned by
|
||
// prebuilds system user).
|
||
// 2. A running regular workspace (not owned by the prebuilds system user).
|
||
func setupTestDBPrebuildAntagonists(t *testing.T, db database.Store, ps pubsub.Pubsub, org database.Organization) {
|
||
t.Helper()
|
||
|
||
templateAdmin := dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
|
||
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
|
||
OrganizationID: org.ID,
|
||
UserID: templateAdmin.ID,
|
||
})
|
||
member := dbgen.User(t, db, database.User{})
|
||
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
|
||
OrganizationID: org.ID,
|
||
UserID: member.ID,
|
||
})
|
||
tpl := dbgen.Template(t, db, database.Template{
|
||
OrganizationID: org.ID,
|
||
CreatedBy: templateAdmin.ID,
|
||
})
|
||
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: templateAdmin.ID,
|
||
})
|
||
|
||
// 1) Stopped prebuilt workspace (owned by prebuilds system user)
|
||
stoppedPrebuild := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
TemplateID: tpl.ID,
|
||
Name: "prebuild-antagonist-stopped",
|
||
Deleted: false,
|
||
})
|
||
|
||
// STOP build (build number 2, most recent)
|
||
stoppedJob2 := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
Provisioner: database.ProvisionerTypeEcho,
|
||
Type: database.ProvisionerJobTypeWorkspaceBuild,
|
||
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-30 * time.Second), Valid: true},
|
||
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-20 * time.Second), Valid: true},
|
||
Error: sql.NullString{},
|
||
ErrorCode: sql.NullString{},
|
||
})
|
||
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||
WorkspaceID: stoppedPrebuild.ID,
|
||
TemplateVersionID: tv.ID,
|
||
JobID: stoppedJob2.ID,
|
||
BuildNumber: 2,
|
||
Transition: database.WorkspaceTransitionStop,
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
Reason: database.BuildReasonInitiator,
|
||
// Explicitly not using a preset here. This shouldn't normally be possible,
|
||
// but without this the reconciler will try to create a new prebuild for
|
||
// this preset, which will affect the tests.
|
||
TemplateVersionPresetID: uuid.NullUUID{},
|
||
})
|
||
|
||
// START build (build number 1, older)
|
||
stoppedJob1 := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
Provisioner: database.ProvisionerTypeEcho,
|
||
Type: database.ProvisionerJobTypeWorkspaceBuild,
|
||
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-60 * time.Second), Valid: true},
|
||
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-50 * time.Second), Valid: true},
|
||
Error: sql.NullString{},
|
||
ErrorCode: sql.NullString{},
|
||
})
|
||
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||
WorkspaceID: stoppedPrebuild.ID,
|
||
TemplateVersionID: tv.ID,
|
||
JobID: stoppedJob1.ID,
|
||
BuildNumber: 1,
|
||
Transition: database.WorkspaceTransitionStart,
|
||
InitiatorID: database.PrebuildsSystemUserID,
|
||
Reason: database.BuildReasonInitiator,
|
||
})
|
||
|
||
// 2) Running regular workspace (not owned by prebuilds system user)
|
||
regularWorkspace := dbgen.Workspace(t, db, database.WorkspaceTable{
|
||
OwnerID: member.ID,
|
||
TemplateID: tpl.ID,
|
||
Name: "antagonist-regular-workspace",
|
||
Deleted: false,
|
||
})
|
||
regularJob := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: member.ID,
|
||
Provisioner: database.ProvisionerTypeEcho,
|
||
Type: database.ProvisionerJobTypeWorkspaceBuild,
|
||
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-40 * time.Second), Valid: true},
|
||
CompletedAt: sql.NullTime{Time: dbtime.Now().Add(-30 * time.Second), Valid: true},
|
||
Error: sql.NullString{},
|
||
ErrorCode: sql.NullString{},
|
||
})
|
||
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
||
WorkspaceID: regularWorkspace.ID,
|
||
TemplateVersionID: tv.ID,
|
||
JobID: regularJob.ID,
|
||
BuildNumber: 1,
|
||
Transition: database.WorkspaceTransitionStart,
|
||
InitiatorID: member.ID,
|
||
Reason: database.BuildReasonInitiator,
|
||
})
|
||
}
|
||
|
||
var allTransitions = []database.WorkspaceTransition{
|
||
database.WorkspaceTransitionStart,
|
||
database.WorkspaceTransitionStop,
|
||
database.WorkspaceTransitionDelete,
|
||
}
|
||
|
||
var allJobStatuses = []database.ProvisionerJobStatus{
|
||
database.ProvisionerJobStatusPending,
|
||
database.ProvisionerJobStatusRunning,
|
||
database.ProvisionerJobStatusSucceeded,
|
||
database.ProvisionerJobStatusFailed,
|
||
database.ProvisionerJobStatusCanceled,
|
||
database.ProvisionerJobStatusCanceling,
|
||
}
|
||
|
||
func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus {
|
||
return slice.Filter(except, func(status database.ProvisionerJobStatus) bool {
|
||
return !slice.Contains(allJobStatuses, status)
|
||
})
|
||
}
|
||
|
||
func mustParseTime(t *testing.T, layout, value string) time.Time {
|
||
t.Helper()
|
||
parsedTime, err := time.Parse(layout, value)
|
||
require.NoError(t, err)
|
||
return parsedTime
|
||
}
|
||
|
||
func TestReconciliationRespectsPauseSetting(t *testing.T) {
|
||
t.Parallel()
|
||
|
||
ctx := testutil.Context(t, testutil.WaitLong)
|
||
clock := quartz.NewMock(t)
|
||
db, ps := dbtestutil.NewDB(t)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
logger := testutil.Logger(t)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
reconciler := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, logger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
10,
|
||
nil,
|
||
)
|
||
|
||
// Setup a template with a preset that should create prebuilds
|
||
org := dbgen.Organization(t, db, database.Organization{})
|
||
user := dbgen.User(t, db, database.User{})
|
||
template := dbgen.Template(t, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, user.ID, template.ID)
|
||
_ = setupTestDBPreset(t, db, templateVersionID, 2, "test")
|
||
|
||
// Initially, reconciliation should create prebuilds
|
||
_, err := reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify that prebuilds were created
|
||
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, 2, "should have created 2 prebuilds")
|
||
|
||
// Now pause prebuilds reconciliation
|
||
err = prebuilds.SetPrebuildsReconciliationPaused(ctx, db, true)
|
||
require.NoError(t, err)
|
||
|
||
// Delete the existing prebuilds to simulate a scenario where reconciliation would normally recreate them
|
||
for _, workspace := range workspaces {
|
||
err = db.UpdateWorkspaceDeletedByID(ctx, database.UpdateWorkspaceDeletedByIDParams{
|
||
ID: workspace.ID,
|
||
Deleted: true,
|
||
})
|
||
require.NoError(t, err)
|
||
}
|
||
|
||
// Verify prebuilds are deleted
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, 0, "prebuilds should be deleted")
|
||
|
||
// Run reconciliation again - it should be paused and not recreate prebuilds
|
||
_, err = reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify that no new prebuilds were created because reconciliation is paused
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, 0, "should not create prebuilds when reconciliation is paused")
|
||
|
||
// Resume prebuilds reconciliation
|
||
err = prebuilds.SetPrebuildsReconciliationPaused(ctx, db, false)
|
||
require.NoError(t, err)
|
||
|
||
// Run reconciliation again - it should now recreate the prebuilds
|
||
_, err = reconciler.ReconcileAll(ctx)
|
||
require.NoError(t, err)
|
||
|
||
// Verify that prebuilds were recreated
|
||
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
||
require.NoError(t, err)
|
||
require.Len(t, workspaces, 2, "should have recreated 2 prebuilds after resuming")
|
||
}
|
||
|
||
// BenchmarkReconcileAll_NoOps benchmarks the reconciliation loop with varying numbers
|
||
// of presets of inactive versions that require no reconciliation actions.
|
||
//
|
||
// This validates the performance benefit of the CanSkipReconciliation optimization,
|
||
// which avoids spawning goroutines for presets that don't need reconciliation actions.
|
||
//
|
||
// go test -bench='^BenchmarkReconcileAll_NoOps$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
|
||
func BenchmarkReconcileAll_NoOps(b *testing.B) {
|
||
benchCases := []struct {
|
||
name string
|
||
presetCount int
|
||
}{
|
||
{"100_presets", 100},
|
||
{"1000_presets", 1000},
|
||
{"5000_presets", 5000},
|
||
}
|
||
|
||
for _, bc := range benchCases {
|
||
b.Run(bc.name, func(b *testing.B) {
|
||
// Setup
|
||
ctx := context.Background()
|
||
logger := slog.Make()
|
||
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
|
||
|
||
// Database configuration set per replica (see cli/server.go).
|
||
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
|
||
maxOpenConns := 10
|
||
sqlDB.SetMaxOpenConns(maxOpenConns)
|
||
sqlDB.SetMaxIdleConns(3)
|
||
|
||
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, prebuildsLogger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
maxOpenConns,
|
||
nil,
|
||
)
|
||
|
||
org := dbgen.Organization(b, db, database.Organization{})
|
||
user := dbgen.User(b, db, database.User{})
|
||
|
||
for i := 0; i < bc.presetCount; i++ {
|
||
template := dbgen.Template(b, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
|
||
oldTV := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
})
|
||
dbgen.Preset(b, db, database.InsertPresetParams{
|
||
TemplateVersionID: oldTV.ID,
|
||
Name: "default",
|
||
DesiredInstances: sql.NullInt32{Int32: 2, Valid: true},
|
||
})
|
||
|
||
// Create new version without preset and make it active
|
||
newTV := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
})
|
||
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: template.ID,
|
||
ActiveVersionID: newTV.ID,
|
||
})
|
||
require.NoError(b, err)
|
||
}
|
||
|
||
// Verify setup: all presets should be inactive with no work
|
||
// Get all presets from all templates
|
||
presets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
|
||
require.NoError(b, err)
|
||
require.Len(b, presets, bc.presetCount)
|
||
|
||
// Should have no prebuilt workspaces
|
||
workspaces, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
|
||
OwnerID: database.PrebuildsSystemUserID,
|
||
})
|
||
require.NoError(b, err)
|
||
require.Empty(b, workspaces)
|
||
|
||
// Benchmark the reconciliation loop
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
stats, err := controller.ReconcileAll(ctx)
|
||
require.NoError(b, err)
|
||
_ = stats
|
||
}
|
||
})
|
||
}
|
||
}
|
||
|
||
// BenchmarkReconcileAll_ConnectionContention benchmarks the reconciliation loop with varying
|
||
// levels of database connection contention.
|
||
//
|
||
// This measures reconciliation time under heavy database load, where each preset
|
||
// needs to create multiple prebuilt workspaces.
|
||
//
|
||
// go test -bench='^BenchmarkReconcileAll_ConnectionContention$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
|
||
func BenchmarkReconcileAll_ConnectionContention(b *testing.B) {
|
||
benchCases := []struct {
|
||
name string
|
||
presetsForReconciliation int
|
||
desiredInstances int32
|
||
}{
|
||
{"10_presets_5_instances", 10, 5}, // 50 creates
|
||
{"50_presets_5_instances", 50, 5}, // 250 creates
|
||
{"100_presets_5_instances", 100, 5}, // 500 creates
|
||
{"1000_presets_10_instances", 1000, 10}, // 10000 creates
|
||
}
|
||
|
||
for _, bc := range benchCases {
|
||
b.Run(bc.name, func(b *testing.B) {
|
||
for i := 0; i < b.N; i++ {
|
||
b.StopTimer()
|
||
|
||
// Setup: Create a fresh database for each iteration because ReconcileAll
|
||
// creates prebuilds on the first run. Subsequent runs would see those
|
||
// prebuilds as "in progress" and skip creating new ones, making the
|
||
// benchmark results inconsistent.
|
||
ctx := context.Background()
|
||
logger := slog.Make()
|
||
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
|
||
|
||
// Database configuration set per replica (see cli/server.go).
|
||
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
|
||
maxOpenConns := 10
|
||
sqlDB.SetMaxOpenConns(maxOpenConns)
|
||
sqlDB.SetMaxIdleConns(3)
|
||
|
||
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, prebuildsLogger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
maxOpenConns,
|
||
nil,
|
||
)
|
||
|
||
// Create presets from active template versions that need reconciliation actions
|
||
org := dbgen.Organization(b, db, database.Organization{})
|
||
user := dbgen.User(b, db, database.User{})
|
||
|
||
for p := 0; p < bc.presetsForReconciliation; p++ {
|
||
template := dbgen.Template(b, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
|
||
// Create a completed provisioner job for the template version.
|
||
// This is needed because workspace builds copy the StorageMethod and FileID
|
||
// from the template version's import job to know which Terraform files to use.
|
||
file := dbgen.File(b, db, database.File{
|
||
CreatedBy: user.ID,
|
||
Hash: uuid.NewString(), // Generate unique hash for each file
|
||
})
|
||
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: user.ID,
|
||
FileID: file.ID,
|
||
StorageMethod: database.ProvisionerStorageMethodFile,
|
||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
|
||
})
|
||
|
||
tv := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
JobID: templateVersionJob.ID,
|
||
})
|
||
|
||
dbgen.Preset(b, db, database.InsertPresetParams{
|
||
TemplateVersionID: tv.ID,
|
||
Name: "default",
|
||
DesiredInstances: sql.NullInt32{Int32: bc.desiredInstances, Valid: true},
|
||
})
|
||
|
||
// Make this the active version
|
||
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: template.ID,
|
||
ActiveVersionID: tv.ID,
|
||
})
|
||
require.NoError(b, err)
|
||
}
|
||
|
||
// Verify setup: all presets should require reconciliation
|
||
// Get all presets from all templates
|
||
presets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
|
||
require.NoError(b, err)
|
||
require.Len(b, presets, bc.presetsForReconciliation)
|
||
|
||
b.StartTimer()
|
||
|
||
// Measure reconciliation
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(b, err)
|
||
|
||
b.StopTimer()
|
||
}
|
||
})
|
||
}
|
||
}
|
||
|
||
// BenchmarkReconcileAll_Mix benchmarks reconciliation performance when there are
|
||
// many total presets in the database, but only a small subset are active and need reconciliation.
|
||
//
|
||
// This validates that the reconciler efficiently filters to only active template versions and
|
||
// doesn't slow down proportionally with the total number of inactive presets.
|
||
//
|
||
// go test -bench='^BenchmarkReconcileAll_Mix$' -run=^$ -benchtime=5x -count=2 ./enterprise/coderd/prebuilds/
|
||
func BenchmarkReconcileAll_Mix(b *testing.B) {
|
||
benchCases := []struct {
|
||
name string
|
||
inactivePresetsCount int // Presets on inactive template versions (noise)
|
||
activePresetsCount int // Presets on active versions that need work
|
||
desiredInstances int32 // Desired prebuilds per preset
|
||
}{
|
||
{"500_total_10_active", 490, 10, 2}, // 20 creates
|
||
{"1000_total_25_active", 975, 25, 2}, // 50 creates
|
||
{"5000_total_50_active", 4950, 50, 2}, // 100 creates
|
||
}
|
||
|
||
for _, bc := range benchCases {
|
||
b.Run(bc.name, func(b *testing.B) {
|
||
for i := 0; i < b.N; i++ {
|
||
b.StopTimer()
|
||
|
||
// Setup: Create a fresh database for each iteration because ReconcileAll
|
||
// creates prebuilds on the first run. Subsequent runs would see those
|
||
// prebuilds as "in progress" and skip creating new ones, making the
|
||
// benchmark results inconsistent.
|
||
ctx := context.Background()
|
||
logger := slog.Make()
|
||
db, ps, sqlDB := dbtestutil.NewDBWithSQLDB(b, dbtestutil.WithLogger(logger))
|
||
|
||
// Database configuration set per replica (see cli/server.go).
|
||
// Default value for CODER_PG_CONN_MAX_OPEN is 10.
|
||
maxOpenConns := 10
|
||
sqlDB.SetMaxOpenConns(maxOpenConns)
|
||
sqlDB.SetMaxIdleConns(3)
|
||
|
||
clock := quartz.NewMock(b).WithLogger(quartz.NoOpLogger)
|
||
cfg := codersdk.PrebuildsConfig{
|
||
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
||
}
|
||
prebuildsLogger := slogtest.Make(b, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelError)
|
||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||
controller := prebuilds.NewStoreReconciler(
|
||
db, ps, cache, cfg, prebuildsLogger,
|
||
clock,
|
||
prometheus.NewRegistry(),
|
||
newNoopEnqueuer(),
|
||
newNoopUsageCheckerPtr(),
|
||
noop.NewTracerProvider(),
|
||
maxOpenConns,
|
||
nil,
|
||
)
|
||
|
||
org := dbgen.Organization(b, db, database.Organization{})
|
||
user := dbgen.User(b, db, database.User{})
|
||
|
||
// Create inactive presets (noise that should be filtered out efficiently)
|
||
// These are on templates with inactive versions
|
||
for p := 0; p < bc.inactivePresetsCount; p++ {
|
||
template := dbgen.Template(b, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
})
|
||
|
||
file := dbgen.File(b, db, database.File{
|
||
CreatedBy: user.ID,
|
||
Hash: fmt.Sprintf("inactive-%d", p),
|
||
})
|
||
|
||
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: user.ID,
|
||
FileID: file.ID,
|
||
StorageMethod: database.ProvisionerStorageMethodFile,
|
||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
|
||
})
|
||
|
||
inactiveVersion := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
JobID: templateVersionJob.ID,
|
||
Name: fmt.Sprintf("inactive-v%d", p),
|
||
})
|
||
|
||
// Create presets on this inactive version
|
||
dbgen.Preset(b, db, database.InsertPresetParams{
|
||
TemplateVersionID: inactiveVersion.ID,
|
||
Name: "default",
|
||
DesiredInstances: sql.NullInt32{Int32: 2, Valid: true},
|
||
})
|
||
|
||
// Create a newer active version (making the above version inactive)
|
||
newerFile := dbgen.File(b, db, database.File{
|
||
CreatedBy: user.ID,
|
||
Hash: fmt.Sprintf("active-no-preset-%d", p),
|
||
})
|
||
|
||
newerJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: user.ID,
|
||
FileID: newerFile.ID,
|
||
StorageMethod: database.ProvisionerStorageMethodFile,
|
||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
|
||
})
|
||
|
||
activeVersion := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
JobID: newerJob.ID,
|
||
Name: fmt.Sprintf("active-v%d", p),
|
||
})
|
||
|
||
// Make the newer version active (no presets = no reconciliation work)
|
||
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: template.ID,
|
||
ActiveVersionID: activeVersion.ID,
|
||
})
|
||
require.NoError(b, err)
|
||
}
|
||
|
||
// Create active presets that need reconciliation (missing prebuilds)
|
||
for p := 0; p < bc.activePresetsCount; p++ {
|
||
template := dbgen.Template(b, db, database.Template{
|
||
CreatedBy: user.ID,
|
||
OrganizationID: org.ID,
|
||
Name: fmt.Sprintf("needs-work-%d", p),
|
||
})
|
||
|
||
file := dbgen.File(b, db, database.File{
|
||
CreatedBy: user.ID,
|
||
Hash: fmt.Sprintf("needs-work-%d", p),
|
||
})
|
||
|
||
// Create a completed provisioner job for the template version.
|
||
// This is needed because workspace builds copy the StorageMethod and FileID
|
||
// from the template version's import job to know which Terraform files to use.
|
||
templateVersionJob := dbgen.ProvisionerJob(b, db, ps, database.ProvisionerJob{
|
||
OrganizationID: org.ID,
|
||
InitiatorID: user.ID,
|
||
FileID: file.ID,
|
||
StorageMethod: database.ProvisionerStorageMethodFile,
|
||
Type: database.ProvisionerJobTypeTemplateVersionImport,
|
||
CompletedAt: sql.NullTime{Time: clock.Now(), Valid: true},
|
||
})
|
||
|
||
tv := dbgen.TemplateVersion(b, db, database.TemplateVersion{
|
||
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
||
OrganizationID: org.ID,
|
||
CreatedBy: user.ID,
|
||
JobID: templateVersionJob.ID,
|
||
})
|
||
|
||
dbgen.Preset(b, db, database.InsertPresetParams{
|
||
TemplateVersionID: tv.ID,
|
||
Name: "default",
|
||
DesiredInstances: sql.NullInt32{Int32: bc.desiredInstances, Valid: true},
|
||
})
|
||
|
||
// Make this the active version
|
||
err := db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
||
ID: template.ID,
|
||
ActiveVersionID: tv.ID,
|
||
})
|
||
require.NoError(b, err)
|
||
}
|
||
|
||
// Verify setup
|
||
allPresets, err := db.GetTemplatePresetsWithPrebuilds(ctx, uuid.NullUUID{})
|
||
require.NoError(b, err)
|
||
totalCount := bc.inactivePresetsCount + bc.activePresetsCount
|
||
require.Len(b, allPresets, totalCount, "total preset count should match")
|
||
|
||
// Count how many are actually active
|
||
activeCount := 0
|
||
for _, preset := range allPresets {
|
||
presetTemplate, err := db.GetTemplateByID(ctx, preset.TemplateID)
|
||
require.NoError(b, err)
|
||
if presetTemplate.ActiveVersionID == preset.TemplateVersionID {
|
||
activeCount++
|
||
}
|
||
}
|
||
require.Equal(b, bc.activePresetsCount, activeCount, "active preset count should match")
|
||
|
||
b.StartTimer()
|
||
|
||
// Measure reconciliation: should only process the active presets
|
||
_, err = controller.ReconcileAll(ctx)
|
||
require.NoError(b, err)
|
||
|
||
b.StopTimer()
|
||
}
|
||
})
|
||
}
|
||
}
|