fix(coderd): fix logic for reporting prebuilt workspace duration metric (#19641)

## Description

When creating a prebuilt workspace, both `flags.IsPrebuild` and
`flags.IsFirstBuild` are true. Previously, the logic rejected cases with
multiple flags, so `coderd_workspace_creation_duration_seconds` wasn’t
updated for prebuilt creations. This is the only valid scenario where
two flags can be true.

## Changes

* Fix logic to update `coderd_workspace_creation_duration_seconds`
metric for prebuilt workspaces.
* Add prebuild helper functions to coderdenttest (other prebuild tests
can reuse this).
* Update workspace's provisionerdmetric tests to include this metric.

Follow-up: https://github.com/coder/coder/pull/19503
Related to: https://github.com/coder/coder/issues/19528
This commit is contained in:
Susana Ferreira
2025-08-29 15:48:48 +01:00
committed by GitHub
parent 02ecf32afe
commit 353f5dedc1
3 changed files with 187 additions and 98 deletions
+8 -27
View File
@@ -100,25 +100,14 @@ func (m *Metrics) Register(reg prometheus.Registerer) error {
return reg.Register(m.workspaceClaimTimings)
}
func (f WorkspaceTimingFlags) count() int {
count := 0
if f.IsPrebuild {
count++
}
if f.IsClaim {
count++
}
if f.IsFirstBuild {
count++
}
return count
}
// getWorkspaceTimingType returns the type of the workspace build:
// - isPrebuild: if the workspace build corresponds to the creation of a prebuilt workspace
// - isClaim: if the workspace build corresponds to the claim of a prebuilt workspace
// - isWorkspaceFirstBuild: if the workspace build corresponds to the creation of a regular workspace
// (not created from the prebuild pool)
// getWorkspaceTimingType classifies a workspace build:
// - PrebuildCreation: creation of a prebuilt workspace
// - PrebuildClaim: claim of an existing prebuilt workspace
// - WorkspaceCreation: first build of a regular (non-prebuilt) workspace
//
// Note: order matters. Creating a prebuilt workspace is also a first build
// (IsPrebuild && IsFirstBuild). We check IsPrebuild before IsFirstBuild so
// prebuilds take precedence. This is the only case where two flags can be true.
func getWorkspaceTimingType(flags WorkspaceTimingFlags) WorkspaceTimingType {
switch {
case flags.IsPrebuild:
@@ -149,14 +138,6 @@ func (m *Metrics) UpdateWorkspaceTimingsMetrics(
"isClaim", flags.IsClaim,
"isWorkspaceFirstBuild", flags.IsFirstBuild)
if flags.count() > 1 {
m.logger.Warn(ctx, "invalid workspace timing flags",
"isPrebuild", flags.IsPrebuild,
"isClaim", flags.IsClaim,
"isWorkspaceFirstBuild", flags.IsFirstBuild)
return
}
workspaceTimingType := getWorkspaceTimingType(flags)
switch workspaceTimingType {
case WorkspaceCreation:
@@ -5,6 +5,7 @@ import (
"crypto/ed25519"
"crypto/rand"
"crypto/tls"
"database/sql"
"io"
"net/http"
"os/exec"
@@ -23,10 +24,13 @@ import (
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/drpcsdk"
"github.com/coder/coder/v2/enterprise/coderd"
"github.com/coder/coder/v2/enterprise/coderd/license"
entprebuilds "github.com/coder/coder/v2/enterprise/coderd/prebuilds"
"github.com/coder/coder/v2/enterprise/dbcrypt"
"github.com/coder/coder/v2/provisioner/echo"
"github.com/coder/coder/v2/provisioner/terraform"
@@ -446,3 +450,98 @@ func newExternalProvisionerDaemon(t testing.TB, client *codersdk.Client, org uui
return closer
}
func GetRunningPrebuilds(
ctx context.Context,
t *testing.T,
db database.Store,
desiredPrebuilds int,
) []database.GetRunningPrebuiltWorkspacesRow {
t.Helper()
var runningPrebuilds []database.GetRunningPrebuiltWorkspacesRow
testutil.Eventually(ctx, t, func(context.Context) bool {
prebuiltWorkspaces, err := db.GetRunningPrebuiltWorkspaces(ctx)
assert.NoError(t, err, "failed to get running prebuilds")
for _, prebuild := range prebuiltWorkspaces {
runningPrebuilds = append(runningPrebuilds, prebuild)
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, prebuild.ID)
assert.NoError(t, err, "failed to get agents")
// Manually mark all agents as ready since tests don't have real agent processes
// that would normally report their lifecycle state. Prebuilt workspaces are only
// eligible for claiming when their agents reach the "ready" state.
for _, agent := range agents {
err = db.UpdateWorkspaceAgentLifecycleStateByID(ctx, database.UpdateWorkspaceAgentLifecycleStateByIDParams{
ID: agent.ID,
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
StartedAt: sql.NullTime{Time: time.Now().Add(time.Hour), Valid: true},
ReadyAt: sql.NullTime{Time: time.Now().Add(-1 * time.Hour), Valid: true},
})
assert.NoError(t, err, "failed to update agent")
}
}
t.Logf("found %d running prebuilds so far, want %d", len(runningPrebuilds), desiredPrebuilds)
return len(runningPrebuilds) == desiredPrebuilds
}, testutil.IntervalSlow, "found %d running prebuilds, expected %d", len(runningPrebuilds), desiredPrebuilds)
return runningPrebuilds
}
func MustRunReconciliationLoopForPreset(
ctx context.Context,
t *testing.T,
db database.Store,
reconciler *entprebuilds.StoreReconciler,
preset codersdk.Preset,
) []*prebuilds.ReconciliationActions {
t.Helper()
state, err := reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
ps, err := state.FilterByPreset(preset.ID)
require.NoError(t, err)
require.NotNil(t, ps)
actions, err := reconciler.CalculateActions(ctx, *ps)
require.NoError(t, err)
require.NotNil(t, actions)
require.NoError(t, reconciler.ReconcilePreset(ctx, *ps))
return actions
}
func MustClaimPrebuild(
ctx context.Context,
t *testing.T,
client *codersdk.Client,
userClient *codersdk.Client,
username string,
version codersdk.TemplateVersion,
presetID uuid.UUID,
autostartSchedule ...string,
) codersdk.Workspace {
t.Helper()
var startSchedule string
if len(autostartSchedule) > 0 {
startSchedule = autostartSchedule[0]
}
workspaceName := strings.ReplaceAll(testutil.GetRandomName(t), "_", "-")
userWorkspace, err := userClient.CreateUserWorkspace(ctx, username, codersdk.CreateWorkspaceRequest{
TemplateVersionID: version.ID,
Name: workspaceName,
TemplateVersionPresetID: presetID,
AutostartSchedule: ptr.Ref(startSchedule),
})
require.NoError(t, err)
build := coderdtest.AwaitWorkspaceBuildJobCompleted(t, userClient, userWorkspace.LatestBuild.ID)
require.Equal(t, build.Job.Status, codersdk.ProvisionerJobSucceeded)
workspace := coderdtest.MustWorkspace(t, client, userWorkspace.ID)
require.Equal(t, codersdk.WorkspaceTransitionStart, workspace.LatestBuild.Transition)
return workspace
}
+80 -71
View File
@@ -2879,105 +2879,114 @@ func TestWorkspaceProvisionerdServerMetrics(t *testing.T) {
t.Parallel()
// Setup
log := testutil.Logger(t)
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitSuperLong)
db, pb := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
logger := testutil.Logger(t)
reg := prometheus.NewRegistry()
provisionerdserverMetrics := provisionerdserver.NewMetrics(log)
provisionerdserverMetrics := provisionerdserver.NewMetrics(logger)
err := provisionerdserverMetrics.Register(reg)
require.NoError(t, err)
client, db, owner := coderdenttest.NewWithDatabase(t, &coderdenttest.Options{
client, _, api, owner := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
Options: &coderdtest.Options{
Database: db,
Pubsub: pb,
IncludeProvisionerDaemon: true,
Clock: clock,
ProvisionerdServerMetrics: provisionerdserverMetrics,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureWorkspacePrebuilds: 1,
},
},
})
// Given: a template and a template version with a preset without prebuild instances
presetNoPrebuildID := uuid.New()
versionNoPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil)
_ = coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionNoPrebuild.ID)
templateNoPrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionNoPrebuild.ID)
presetNoPrebuild := dbgen.Preset(t, db, database.InsertPresetParams{
ID: presetNoPrebuildID,
TemplateVersionID: versionNoPrebuild.ID,
})
// Given: a template and a template version with a preset with a prebuild instance
presetPrebuildID := uuid.New()
versionPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil)
_ = coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionPrebuild.ID)
templatePrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionPrebuild.ID)
presetPrebuild := dbgen.Preset(t, db, database.InsertPresetParams{
ID: presetPrebuildID,
TemplateVersionID: versionPrebuild.ID,
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
})
// Given: a prebuild workspace
wb := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OwnerID: database.PrebuildsSystemUserID,
TemplateID: templatePrebuild.ID,
}).Seed(database.WorkspaceBuild{
TemplateVersionID: versionPrebuild.ID,
TemplateVersionPresetID: uuid.NullUUID{
UUID: presetPrebuildID,
Valid: true,
},
}).WithAgent(func(agent []*proto.Agent) []*proto.Agent {
return agent
}).Do()
// Mark the prebuilt workspace's agent as ready so the prebuild can be claimed
// nolint:gocritic
ctx := dbauthz.AsSystemRestricted(testutil.Context(t, testutil.WaitLong))
agent, err := db.GetWorkspaceAgentAndLatestBuildByAuthToken(ctx, uuid.MustParse(wb.AgentToken))
require.NoError(t, err)
err = db.UpdateWorkspaceAgentLifecycleStateByID(ctx, database.UpdateWorkspaceAgentLifecycleStateByIDParams{
ID: agent.WorkspaceAgent.ID,
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
})
require.NoError(t, err)
// Setup Prebuild reconciler
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
reconciler := prebuilds.NewStoreReconciler(
db, pb, cache,
codersdk.PrebuildsConfig{},
logger,
clock,
prometheus.NewRegistry(),
notifications.NewNoopEnqueuer(),
api.AGPL.BuildUsageChecker,
)
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(db)
api.AGPL.PrebuildsClaimer.Store(&claimer)
organizationName, err := client.Organization(ctx, owner.OrganizationID)
require.NoError(t, err)
user, err := client.User(ctx, "testUser")
userClient, user := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID, rbac.RoleMember())
// Setup template and template version with a preset with 1 prebuild instance
versionPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, templateWithAgentAndPresetsWithPrebuilds(1))
coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionPrebuild.ID)
templatePrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionPrebuild.ID)
presetsPrebuild, err := client.TemplateVersionPresets(ctx, versionPrebuild.ID)
require.NoError(t, err)
require.Len(t, presetsPrebuild, 1)
// Setup template and template version with a preset without prebuild instances
versionNoPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, templateWithAgentAndPresetsWithPrebuilds(0))
coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionNoPrebuild.ID)
templateNoPrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionNoPrebuild.ID)
presetsNoPrebuild, err := client.TemplateVersionPresets(ctx, versionNoPrebuild.ID)
require.NoError(t, err)
require.Len(t, presetsNoPrebuild, 1)
// Given: no histogram value for prebuilt workspaces creation
prebuildCreationMetric := promhelp.MetricValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templatePrebuild.Name,
"preset_name": presetsPrebuild[0].Name,
"type": "prebuild",
})
require.Nil(t, prebuildCreationMetric)
// Given: reconciliation loop runs and starts prebuilt workspace
coderdenttest.MustRunReconciliationLoopForPreset(ctx, t, db, reconciler, presetsPrebuild[0])
runningPrebuilds := coderdenttest.GetRunningPrebuilds(ctx, t, db, 1)
require.Len(t, runningPrebuilds, 1)
// Then: the histogram value for prebuilt workspace creation should be updated
prebuildCreationHistogram := promhelp.HistogramValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templatePrebuild.Name,
"preset_name": presetsPrebuild[0].Name,
"type": "prebuild",
})
require.NotNil(t, prebuildCreationHistogram)
require.Equal(t, uint64(1), prebuildCreationHistogram.GetSampleCount())
// Given: a running prebuilt workspace, ready to be claimed
prebuild := coderdtest.MustWorkspace(t, client, runningPrebuilds[0].ID)
require.Equal(t, codersdk.WorkspaceTransitionStart, prebuild.LatestBuild.Transition)
require.Nil(t, prebuild.DormantAt)
require.Nil(t, prebuild.DeletingAt)
// Given: no histogram value for prebuilt workspaces claim
prebuiltWorkspaceHistogramMetric := promhelp.MetricValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{
prebuildClaimMetric := promhelp.MetricValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templatePrebuild.Name,
"preset_name": presetPrebuild.Name,
"preset_name": presetsPrebuild[0].Name,
})
require.Nil(t, prebuiltWorkspaceHistogramMetric)
require.Nil(t, prebuildClaimMetric)
// Given: the prebuilt workspace is claimed by a user
claimedWorkspace, err := client.CreateUserWorkspace(ctx, user.ID.String(), codersdk.CreateWorkspaceRequest{
TemplateVersionID: versionPrebuild.ID,
TemplateVersionPresetID: presetPrebuildID,
Name: coderdtest.RandomUsername(t),
})
require.NoError(t, err)
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, claimedWorkspace.LatestBuild.ID)
require.Equal(t, wb.Workspace.ID, claimedWorkspace.ID)
workspace := coderdenttest.MustClaimPrebuild(ctx, t, client, userClient, user.Username, versionPrebuild, presetsPrebuild[0].ID)
require.Equal(t, prebuild.ID, workspace.ID)
// Then: the histogram value for prebuilt workspace claim should be updated
prebuiltWorkspaceHistogram := promhelp.HistogramValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{
prebuildClaimHistogram := promhelp.HistogramValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templatePrebuild.Name,
"preset_name": presetPrebuild.Name,
"preset_name": presetsPrebuild[0].Name,
})
require.NotNil(t, prebuiltWorkspaceHistogram)
require.Equal(t, uint64(1), prebuiltWorkspaceHistogram.GetSampleCount())
require.NotNil(t, prebuildClaimHistogram)
require.Equal(t, uint64(1), prebuildClaimHistogram.GetSampleCount())
// Given: no histogram value for regular workspaces creation
regularWorkspaceHistogramMetric := promhelp.MetricValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templateNoPrebuild.Name,
"preset_name": presetNoPrebuild.Name,
"preset_name": presetsNoPrebuild[0].Name,
"type": "regular",
})
require.Nil(t, regularWorkspaceHistogramMetric)
@@ -2985,7 +2994,7 @@ func TestWorkspaceProvisionerdServerMetrics(t *testing.T) {
// Given: a user creates a regular workspace (without prebuild pool)
regularWorkspace, err := client.CreateUserWorkspace(ctx, user.ID.String(), codersdk.CreateWorkspaceRequest{
TemplateVersionID: versionNoPrebuild.ID,
TemplateVersionPresetID: presetNoPrebuildID,
TemplateVersionPresetID: presetsNoPrebuild[0].ID,
Name: coderdtest.RandomUsername(t),
})
require.NoError(t, err)
@@ -2995,7 +3004,7 @@ func TestWorkspaceProvisionerdServerMetrics(t *testing.T) {
regularWorkspaceHistogram := promhelp.HistogramValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{
"organization_name": organizationName.Name,
"template_name": templateNoPrebuild.Name,
"preset_name": presetNoPrebuild.Name,
"preset_name": presetsNoPrebuild[0].Name,
"type": "regular",
})
require.NotNil(t, regularWorkspaceHistogram)