mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: add provisioner job queue wait time histogram and jobs enqueued counter (#21869)
This PR adds some metrics to help identify job enqueue rates and latencies. This work was initiated as a way to help reduce the cost of the observation/measurement itself for autostart scaletests, which impacts our ability to identify/reason about the load caused by autostart. See: https://github.com/coder/internal/issues/1209 I've extended the metrics here to account for regular user initiated builds, prebuilds, autostarts, etc. IMO there is still the question here of whether we want to include or need the `transition` label, which is only present on workspace builds. Including it does lead to an increase in cardinality, and in the case of the histogram (when not using native histograms) that's at least a few extra series for every bucket. We could remove the transition label there but keep it on the counter. Additionally, the histogram is currently observing latencies for other jobs, such as template builds/version imports, those do not have a transition type associated with them. Tested briefly in a workspace, can see metric values like the following: - `coderd_workspace_builds_enqueued_total{build_reason="autostart",provisioner_type="terraform",status="success",transition="start"} 1` - `coderd_provisioner_job_queue_wait_seconds_bucket{build_reason="autostart",job_type="workspace_build",provisioner_type="terraform",transition="start",le="0.025"} 1` --------- Signed-off-by: Callum Styan <callumstyan@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+8
-1
@@ -95,6 +95,7 @@ import (
|
|||||||
"github.com/coder/coder/v2/coderd/webpush"
|
"github.com/coder/coder/v2/coderd/webpush"
|
||||||
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
|
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
|
||||||
"github.com/coder/coder/v2/coderd/workspacestats"
|
"github.com/coder/coder/v2/coderd/workspacestats"
|
||||||
|
"github.com/coder/coder/v2/coderd/wsbuilder"
|
||||||
"github.com/coder/coder/v2/codersdk"
|
"github.com/coder/coder/v2/codersdk"
|
||||||
"github.com/coder/coder/v2/codersdk/drpcsdk"
|
"github.com/coder/coder/v2/codersdk/drpcsdk"
|
||||||
"github.com/coder/coder/v2/cryptorand"
|
"github.com/coder/coder/v2/cryptorand"
|
||||||
@@ -935,6 +936,12 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
|||||||
options.StatsBatcher = batcher
|
options.StatsBatcher = batcher
|
||||||
defer closeBatcher()
|
defer closeBatcher()
|
||||||
|
|
||||||
|
wsBuilderMetrics, err := wsbuilder.NewMetrics(options.PrometheusRegistry)
|
||||||
|
if err != nil {
|
||||||
|
return xerrors.Errorf("failed to register workspace builder metrics: %w", err)
|
||||||
|
}
|
||||||
|
options.WorkspaceBuilderMetrics = wsBuilderMetrics
|
||||||
|
|
||||||
// Manage notifications.
|
// Manage notifications.
|
||||||
var (
|
var (
|
||||||
notificationsCfg = options.DeploymentValues.Notifications
|
notificationsCfg = options.DeploymentValues.Notifications
|
||||||
@@ -1118,7 +1125,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
|||||||
autobuildTicker := time.NewTicker(vals.AutobuildPollInterval.Value())
|
autobuildTicker := time.NewTicker(vals.AutobuildPollInterval.Value())
|
||||||
defer autobuildTicker.Stop()
|
defer autobuildTicker.Stop()
|
||||||
autobuildExecutor := autobuild.NewExecutor(
|
autobuildExecutor := autobuild.NewExecutor(
|
||||||
ctx, options.Database, options.Pubsub, coderAPI.FileCache, options.PrometheusRegistry, coderAPI.TemplateScheduleStore, &coderAPI.Auditor, coderAPI.AccessControlStore, coderAPI.BuildUsageChecker, logger, autobuildTicker.C, options.NotificationsEnqueuer, coderAPI.Experiments)
|
ctx, options.Database, options.Pubsub, coderAPI.FileCache, options.PrometheusRegistry, coderAPI.TemplateScheduleStore, &coderAPI.Auditor, coderAPI.AccessControlStore, coderAPI.BuildUsageChecker, logger, autobuildTicker.C, options.NotificationsEnqueuer, coderAPI.Experiments, coderAPI.WorkspaceBuilderMetrics)
|
||||||
autobuildExecutor.Run()
|
autobuildExecutor.Run()
|
||||||
|
|
||||||
jobReaperTicker := time.NewTicker(vals.JobReaperDetectorInterval.Value())
|
jobReaperTicker := time.NewTicker(vals.JobReaperDetectorInterval.Value())
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ type Executor struct {
|
|||||||
notificationsEnqueuer notifications.Enqueuer
|
notificationsEnqueuer notifications.Enqueuer
|
||||||
reg prometheus.Registerer
|
reg prometheus.Registerer
|
||||||
experiments codersdk.Experiments
|
experiments codersdk.Experiments
|
||||||
|
workspaceBuilderMetrics *wsbuilder.Metrics
|
||||||
|
|
||||||
metrics executorMetrics
|
metrics executorMetrics
|
||||||
}
|
}
|
||||||
@@ -67,7 +68,7 @@ type Stats struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new wsactions executor.
|
// New returns a new wsactions executor.
|
||||||
func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, fc *files.Cache, reg prometheus.Registerer, tss *atomic.Pointer[schedule.TemplateScheduleStore], auditor *atomic.Pointer[audit.Auditor], acs *atomic.Pointer[dbauthz.AccessControlStore], buildUsageChecker *atomic.Pointer[wsbuilder.UsageChecker], log slog.Logger, tick <-chan time.Time, enqueuer notifications.Enqueuer, exp codersdk.Experiments) *Executor {
|
func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, fc *files.Cache, reg prometheus.Registerer, tss *atomic.Pointer[schedule.TemplateScheduleStore], auditor *atomic.Pointer[audit.Auditor], acs *atomic.Pointer[dbauthz.AccessControlStore], buildUsageChecker *atomic.Pointer[wsbuilder.UsageChecker], log slog.Logger, tick <-chan time.Time, enqueuer notifications.Enqueuer, exp codersdk.Experiments, workspaceBuilderMetrics *wsbuilder.Metrics) *Executor {
|
||||||
factory := promauto.With(reg)
|
factory := promauto.With(reg)
|
||||||
le := &Executor{
|
le := &Executor{
|
||||||
//nolint:gocritic // Autostart has a limited set of permissions.
|
//nolint:gocritic // Autostart has a limited set of permissions.
|
||||||
@@ -84,6 +85,7 @@ func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, fc *f
|
|||||||
notificationsEnqueuer: enqueuer,
|
notificationsEnqueuer: enqueuer,
|
||||||
reg: reg,
|
reg: reg,
|
||||||
experiments: exp,
|
experiments: exp,
|
||||||
|
workspaceBuilderMetrics: workspaceBuilderMetrics,
|
||||||
metrics: executorMetrics{
|
metrics: executorMetrics{
|
||||||
autobuildExecutionDuration: factory.NewHistogram(prometheus.HistogramOpts{
|
autobuildExecutionDuration: factory.NewHistogram(prometheus.HistogramOpts{
|
||||||
Namespace: "coderd",
|
Namespace: "coderd",
|
||||||
@@ -335,7 +337,8 @@ func (e *Executor) runOnce(t time.Time) Stats {
|
|||||||
SetLastWorkspaceBuildInTx(&latestBuild).
|
SetLastWorkspaceBuildInTx(&latestBuild).
|
||||||
SetLastWorkspaceBuildJobInTx(&latestJob).
|
SetLastWorkspaceBuildJobInTx(&latestJob).
|
||||||
Experiments(e.experiments).
|
Experiments(e.experiments).
|
||||||
Reason(reason)
|
Reason(reason).
|
||||||
|
BuildMetrics(e.workspaceBuilderMetrics)
|
||||||
log.Debug(e.ctx, "auto building workspace", slog.F("transition", nextTransition))
|
log.Debug(e.ctx, "auto building workspace", slog.F("transition", nextTransition))
|
||||||
if nextTransition == database.WorkspaceTransitionStart &&
|
if nextTransition == database.WorkspaceTransitionStart &&
|
||||||
useActiveVersion(accessControl, ws) {
|
useActiveVersion(accessControl, ws) {
|
||||||
|
|||||||
@@ -245,6 +245,7 @@ type Options struct {
|
|||||||
MetadataBatcherOptions []metadatabatcher.Option
|
MetadataBatcherOptions []metadatabatcher.Option
|
||||||
|
|
||||||
ProvisionerdServerMetrics *provisionerdserver.Metrics
|
ProvisionerdServerMetrics *provisionerdserver.Metrics
|
||||||
|
WorkspaceBuilderMetrics *wsbuilder.Metrics
|
||||||
|
|
||||||
// WorkspaceAppAuditSessionTimeout allows changing the timeout for audit
|
// WorkspaceAppAuditSessionTimeout allows changing the timeout for audit
|
||||||
// sessions. Raising or lowering this value will directly affect the write
|
// sessions. Raising or lowering this value will directly affect the write
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ type Options struct {
|
|||||||
TelemetryReporter telemetry.Reporter
|
TelemetryReporter telemetry.Reporter
|
||||||
|
|
||||||
ProvisionerdServerMetrics *provisionerdserver.Metrics
|
ProvisionerdServerMetrics *provisionerdserver.Metrics
|
||||||
|
WorkspaceBuilderMetrics *wsbuilder.Metrics
|
||||||
UsageInserter usage.Inserter
|
UsageInserter usage.Inserter
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -399,6 +400,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
|
|||||||
options.AutobuildTicker,
|
options.AutobuildTicker,
|
||||||
options.NotificationsEnqueuer,
|
options.NotificationsEnqueuer,
|
||||||
experiments,
|
experiments,
|
||||||
|
options.WorkspaceBuilderMetrics,
|
||||||
).WithStatsChannel(options.AutobuildStats)
|
).WithStatsChannel(options.AutobuildStats)
|
||||||
|
|
||||||
lifecycleExecutor.Run()
|
lifecycleExecutor.Run()
|
||||||
@@ -620,6 +622,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
|
|||||||
AppEncryptionKeyCache: options.APIKeyEncryptionCache,
|
AppEncryptionKeyCache: options.APIKeyEncryptionCache,
|
||||||
OIDCConvertKeyCache: options.OIDCConvertKeyCache,
|
OIDCConvertKeyCache: options.OIDCConvertKeyCache,
|
||||||
ProvisionerdServerMetrics: options.ProvisionerdServerMetrics,
|
ProvisionerdServerMetrics: options.ProvisionerdServerMetrics,
|
||||||
|
WorkspaceBuilderMetrics: options.WorkspaceBuilderMetrics,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ type Metrics struct {
|
|||||||
logger slog.Logger
|
logger slog.Logger
|
||||||
workspaceCreationTimings *prometheus.HistogramVec
|
workspaceCreationTimings *prometheus.HistogramVec
|
||||||
workspaceClaimTimings *prometheus.HistogramVec
|
workspaceClaimTimings *prometheus.HistogramVec
|
||||||
|
jobQueueWait *prometheus.HistogramVec
|
||||||
}
|
}
|
||||||
|
|
||||||
type WorkspaceTimingType int
|
type WorkspaceTimingType int
|
||||||
@@ -29,6 +30,12 @@ const (
|
|||||||
workspaceTypePrebuild = "prebuild"
|
workspaceTypePrebuild = "prebuild"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// BuildReasonPrebuild is the build_reason metric label value for prebuild
|
||||||
|
// operations. This is distinct from database.BuildReason values since prebuilds
|
||||||
|
// use BuildReasonInitiator in the database but we want to track them separately
|
||||||
|
// in metrics. This is also used as a label value by the metrics in wsbuilder.
|
||||||
|
const BuildReasonPrebuild = workspaceTypePrebuild
|
||||||
|
|
||||||
type WorkspaceTimingFlags struct {
|
type WorkspaceTimingFlags struct {
|
||||||
IsPrebuild bool
|
IsPrebuild bool
|
||||||
IsClaim bool
|
IsClaim bool
|
||||||
@@ -90,6 +97,30 @@ func NewMetrics(logger slog.Logger) *Metrics {
|
|||||||
NativeHistogramZeroThreshold: 0,
|
NativeHistogramZeroThreshold: 0,
|
||||||
NativeHistogramMaxZeroThreshold: 0,
|
NativeHistogramMaxZeroThreshold: 0,
|
||||||
}, []string{"organization_name", "template_name", "preset_name"}),
|
}, []string{"organization_name", "template_name", "preset_name"}),
|
||||||
|
jobQueueWait: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Name: "provisioner_job_queue_wait_seconds",
|
||||||
|
Help: "Time from job creation to acquisition by a provisioner daemon.",
|
||||||
|
Buckets: []float64{
|
||||||
|
0.1, // 100ms
|
||||||
|
0.5, // 500ms
|
||||||
|
1, // 1s
|
||||||
|
5, // 5s
|
||||||
|
10, // 10s
|
||||||
|
30, // 30s
|
||||||
|
60, // 1m
|
||||||
|
120, // 2m
|
||||||
|
300, // 5m
|
||||||
|
600, // 10m
|
||||||
|
900, // 15m
|
||||||
|
1800, // 30m
|
||||||
|
},
|
||||||
|
NativeHistogramBucketFactor: 1.1,
|
||||||
|
NativeHistogramMaxBucketNumber: 100,
|
||||||
|
NativeHistogramMinResetDuration: time.Hour,
|
||||||
|
NativeHistogramZeroThreshold: 0,
|
||||||
|
NativeHistogramMaxZeroThreshold: 0,
|
||||||
|
}, []string{"provisioner_type", "job_type", "transition", "build_reason"}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,7 +128,10 @@ func (m *Metrics) Register(reg prometheus.Registerer) error {
|
|||||||
if err := reg.Register(m.workspaceCreationTimings); err != nil {
|
if err := reg.Register(m.workspaceCreationTimings); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return reg.Register(m.workspaceClaimTimings)
|
if err := reg.Register(m.workspaceClaimTimings); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return reg.Register(m.jobQueueWait)
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsTrackable returns true if the workspace build should be tracked in metrics.
|
// IsTrackable returns true if the workspace build should be tracked in metrics.
|
||||||
@@ -162,3 +196,9 @@ func (m *Metrics) UpdateWorkspaceTimingsMetrics(
|
|||||||
// Not a trackable build type (e.g. restart, stop, subsequent builds)
|
// Not a trackable build type (e.g. restart, stop, subsequent builds)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ObserveJobQueueWait records the time a provisioner job spent waiting in the queue.
|
||||||
|
// For non-workspace-build jobs, transition and buildReason should be empty strings.
|
||||||
|
func (m *Metrics) ObserveJobQueueWait(provisionerType, jobType, transition, buildReason string, waitSeconds float64) {
|
||||||
|
m.jobQueueWait.WithLabelValues(provisionerType, jobType, transition, buildReason).Observe(waitSeconds)
|
||||||
|
}
|
||||||
|
|||||||
@@ -478,6 +478,10 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
|
|||||||
TraceMetadata: jobTraceMetadata,
|
TraceMetadata: jobTraceMetadata,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// jobTransition and jobBuildReason are used for metrics; only set for workspace builds.
|
||||||
|
var jobTransition string
|
||||||
|
var jobBuildReason string
|
||||||
|
|
||||||
switch job.Type {
|
switch job.Type {
|
||||||
case database.ProvisionerJobTypeWorkspaceBuild:
|
case database.ProvisionerJobTypeWorkspaceBuild:
|
||||||
var input WorkspaceProvisionJob
|
var input WorkspaceProvisionJob
|
||||||
@@ -584,6 +588,15 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, failJob(fmt.Sprintf("convert workspace transition: %s", err))
|
return nil, failJob(fmt.Sprintf("convert workspace transition: %s", err))
|
||||||
}
|
}
|
||||||
|
jobTransition = string(workspaceBuild.Transition)
|
||||||
|
// Prebuilds use BuildReasonInitiator in the database but we want to
|
||||||
|
// track them separately in metrics. Check the initiator ID to detect
|
||||||
|
// prebuild jobs.
|
||||||
|
if job.InitiatorID == database.PrebuildsSystemUserID {
|
||||||
|
jobBuildReason = BuildReasonPrebuild
|
||||||
|
} else {
|
||||||
|
jobBuildReason = string(workspaceBuild.Reason)
|
||||||
|
}
|
||||||
|
|
||||||
// A previous workspace build exists
|
// A previous workspace build exists
|
||||||
var lastWorkspaceBuildParameters []database.WorkspaceBuildParameter
|
var lastWorkspaceBuildParameters []database.WorkspaceBuildParameter
|
||||||
@@ -825,6 +838,12 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
|
|||||||
return nil, failJob(fmt.Sprintf("payload was too big: %d > %d", protobuf.Size(protoJob), drpcsdk.MaxMessageSize))
|
return nil, failJob(fmt.Sprintf("payload was too big: %d > %d", protobuf.Size(protoJob), drpcsdk.MaxMessageSize))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record the time the job spent waiting in the queue.
|
||||||
|
if s.metrics != nil && job.StartedAt.Valid && job.Provisioner.Valid() {
|
||||||
|
queueWaitSeconds := job.StartedAt.Time.Sub(job.CreatedAt).Seconds()
|
||||||
|
s.metrics.ObserveJobQueueWait(string(job.Provisioner), string(job.Type), jobTransition, jobBuildReason, queueWaitSeconds)
|
||||||
|
}
|
||||||
|
|
||||||
return protoJob, err
|
return protoJob, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -382,7 +382,8 @@ func (api *API) postWorkspaceBuildsInternal(
|
|||||||
LogLevel(string(createBuild.LogLevel)).
|
LogLevel(string(createBuild.LogLevel)).
|
||||||
DeploymentValues(api.Options.DeploymentValues).
|
DeploymentValues(api.Options.DeploymentValues).
|
||||||
Experiments(api.Experiments).
|
Experiments(api.Experiments).
|
||||||
TemplateVersionPresetID(createBuild.TemplateVersionPresetID)
|
TemplateVersionPresetID(createBuild.TemplateVersionPresetID).
|
||||||
|
BuildMetrics(api.WorkspaceBuilderMetrics)
|
||||||
|
|
||||||
if (transition == database.WorkspaceTransitionStart || transition == database.WorkspaceTransitionStop) && createBuild.Reason != "" {
|
if (transition == database.WorkspaceTransitionStart || transition == database.WorkspaceTransitionStop) && createBuild.Reason != "" {
|
||||||
builder = builder.Reason(database.BuildReason(createBuild.Reason))
|
builder = builder.Reason(database.BuildReason(createBuild.Reason))
|
||||||
|
|||||||
@@ -787,7 +787,8 @@ func createWorkspace(
|
|||||||
ActiveVersion().
|
ActiveVersion().
|
||||||
Experiments(api.Experiments).
|
Experiments(api.Experiments).
|
||||||
DeploymentValues(api.DeploymentValues).
|
DeploymentValues(api.DeploymentValues).
|
||||||
RichParameterValues(req.RichParameterValues)
|
RichParameterValues(req.RichParameterValues).
|
||||||
|
BuildMetrics(api.WorkspaceBuilderMetrics)
|
||||||
if req.TemplateVersionID != uuid.Nil {
|
if req.TemplateVersionID != uuid.Nil {
|
||||||
builder = builder.VersionID(req.TemplateVersionID)
|
builder = builder.VersionID(req.TemplateVersionID)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
@@ -21,7 +22,9 @@ import (
|
|||||||
"github.com/coder/coder/v2/agent/agenttest"
|
"github.com/coder/coder/v2/agent/agenttest"
|
||||||
"github.com/coder/coder/v2/coderd"
|
"github.com/coder/coder/v2/coderd"
|
||||||
"github.com/coder/coder/v2/coderd/audit"
|
"github.com/coder/coder/v2/coderd/audit"
|
||||||
|
"github.com/coder/coder/v2/coderd/autobuild"
|
||||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||||
|
"github.com/coder/coder/v2/coderd/coderdtest/promhelp"
|
||||||
"github.com/coder/coder/v2/coderd/database"
|
"github.com/coder/coder/v2/coderd/database"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbfake"
|
"github.com/coder/coder/v2/coderd/database/dbfake"
|
||||||
@@ -30,6 +33,7 @@ import (
|
|||||||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||||||
"github.com/coder/coder/v2/coderd/notifications"
|
"github.com/coder/coder/v2/coderd/notifications"
|
||||||
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
|
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
|
||||||
|
"github.com/coder/coder/v2/coderd/provisionerdserver"
|
||||||
"github.com/coder/coder/v2/coderd/rbac"
|
"github.com/coder/coder/v2/coderd/rbac"
|
||||||
"github.com/coder/coder/v2/coderd/rbac/policy"
|
"github.com/coder/coder/v2/coderd/rbac/policy"
|
||||||
"github.com/coder/coder/v2/coderd/render"
|
"github.com/coder/coder/v2/coderd/render"
|
||||||
@@ -37,6 +41,7 @@ import (
|
|||||||
"github.com/coder/coder/v2/coderd/schedule/cron"
|
"github.com/coder/coder/v2/coderd/schedule/cron"
|
||||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||||
"github.com/coder/coder/v2/coderd/util/slice"
|
"github.com/coder/coder/v2/coderd/util/slice"
|
||||||
|
"github.com/coder/coder/v2/coderd/wsbuilder"
|
||||||
"github.com/coder/coder/v2/codersdk"
|
"github.com/coder/coder/v2/codersdk"
|
||||||
"github.com/coder/coder/v2/cryptorand"
|
"github.com/coder/coder/v2/cryptorand"
|
||||||
"github.com/coder/coder/v2/provisioner/echo"
|
"github.com/coder/coder/v2/provisioner/echo"
|
||||||
@@ -5901,3 +5906,135 @@ func TestWorkspaceCreateWithImplicitPreset(t *testing.T) {
|
|||||||
require.Equal(t, preset2ID, *ws2.LatestBuild.TemplateVersionPresetID)
|
require.Equal(t, preset2ID, *ws2.LatestBuild.TemplateVersionPresetID)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProvisionerJobQueueWaitMetric(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
logger := testutil.Logger(t)
|
||||||
|
reg := prometheus.NewRegistry()
|
||||||
|
metrics := provisionerdserver.NewMetrics(logger)
|
||||||
|
err := metrics.Register(reg)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
client := coderdtest.New(t, &coderdtest.Options{
|
||||||
|
IncludeProvisionerDaemon: true,
|
||||||
|
ProvisionerdServerMetrics: metrics,
|
||||||
|
})
|
||||||
|
user := coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
|
// Create a template version - this triggers a template_version_import job.
|
||||||
|
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
|
||||||
|
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
|
||||||
|
|
||||||
|
// Check that the queue wait metric was recorded for the template_version_import job.
|
||||||
|
importMetric := promhelp.MetricValue(t, reg, "coderd_provisioner_job_queue_wait_seconds", prometheus.Labels{
|
||||||
|
"provisioner_type": string(database.ProvisionerTypeEcho),
|
||||||
|
"job_type": string(database.ProvisionerJobTypeTemplateVersionImport),
|
||||||
|
"transition": "",
|
||||||
|
"build_reason": "",
|
||||||
|
})
|
||||||
|
require.NotNil(t, importMetric, "import job metric should be recorded")
|
||||||
|
importHistogram := importMetric.GetHistogram()
|
||||||
|
require.NotNil(t, importHistogram)
|
||||||
|
require.Equal(t, uint64(1), importHistogram.GetSampleCount(), "import job should have 1 sample")
|
||||||
|
require.Greater(t, importHistogram.GetSampleSum(), 0.0, "import job queue wait should be non-zero")
|
||||||
|
|
||||||
|
// Create a template and workspace - this triggers a workspace_build job.
|
||||||
|
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||||
|
workspace := coderdtest.CreateWorkspace(t, client, template.ID)
|
||||||
|
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
|
||||||
|
|
||||||
|
// Check that the queue wait metric was recorded for the workspace_build job.
|
||||||
|
buildMetric := promhelp.MetricValue(t, reg, "coderd_provisioner_job_queue_wait_seconds", prometheus.Labels{
|
||||||
|
"provisioner_type": string(database.ProvisionerTypeEcho),
|
||||||
|
"job_type": string(database.ProvisionerJobTypeWorkspaceBuild),
|
||||||
|
"transition": string(database.WorkspaceTransitionStart),
|
||||||
|
"build_reason": string(database.BuildReasonInitiator),
|
||||||
|
})
|
||||||
|
require.NotNil(t, buildMetric, "workspace build job metric should be recorded")
|
||||||
|
buildHistogram := buildMetric.GetHistogram()
|
||||||
|
require.NotNil(t, buildHistogram)
|
||||||
|
require.Equal(t, uint64(1), buildHistogram.GetSampleCount(), "workspace build job should have 1 sample")
|
||||||
|
require.Greater(t, buildHistogram.GetSampleSum(), 0.0, "workspace build job queue wait should be non-zero")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWorkspaceBuildsEnqueuedMetric(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var (
|
||||||
|
logger = testutil.Logger(t)
|
||||||
|
reg = prometheus.NewRegistry()
|
||||||
|
metrics = provisionerdserver.NewMetrics(logger)
|
||||||
|
|
||||||
|
sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *")
|
||||||
|
tickCh = make(chan time.Time)
|
||||||
|
statsCh = make(chan autobuild.Stats)
|
||||||
|
)
|
||||||
|
|
||||||
|
err := metrics.Register(reg)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
wsBuilderMetrics, err := wsbuilder.NewMetrics(reg)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{
|
||||||
|
IncludeProvisionerDaemon: true,
|
||||||
|
ProvisionerdServerMetrics: metrics,
|
||||||
|
WorkspaceBuilderMetrics: wsBuilderMetrics,
|
||||||
|
AutobuildTicker: tickCh,
|
||||||
|
AutobuildStats: statsCh,
|
||||||
|
})
|
||||||
|
user := coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
|
// Create a template and workspace with autostart schedule.
|
||||||
|
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
|
||||||
|
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
|
||||||
|
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||||
|
workspace := coderdtest.CreateWorkspace(t, client, template.ID, func(cwr *codersdk.CreateWorkspaceRequest) {
|
||||||
|
cwr.AutostartSchedule = ptr.Ref(sched.String())
|
||||||
|
})
|
||||||
|
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
|
||||||
|
|
||||||
|
// Stop the workspace to prepare for autostart.
|
||||||
|
workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop)
|
||||||
|
|
||||||
|
// Trigger an autostart build via the autobuild ticker. This verifies that
|
||||||
|
// autostart builds are recorded with build_reason="autostart".
|
||||||
|
p, err := coderdtest.GetProvisionerForTags(db, time.Now(), workspace.OrganizationID, map[string]string{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
tickTime := sched.Next(workspace.LatestBuild.CreatedAt)
|
||||||
|
coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, tickTime)
|
||||||
|
tickCh <- tickTime
|
||||||
|
close(tickCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for the autostart to complete.
|
||||||
|
stats := <-statsCh
|
||||||
|
require.Len(t, stats.Errors, 0)
|
||||||
|
require.Len(t, stats.Transitions, 1)
|
||||||
|
require.Contains(t, stats.Transitions, workspace.ID)
|
||||||
|
require.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID])
|
||||||
|
|
||||||
|
// Verify the workspace was autostarted.
|
||||||
|
workspace = coderdtest.MustWorkspace(t, client, workspace.ID)
|
||||||
|
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
|
||||||
|
require.Equal(t, codersdk.BuildReasonAutostart, workspace.LatestBuild.Reason)
|
||||||
|
|
||||||
|
// Now check the autostart metric was recorded.
|
||||||
|
autostartCount := promhelp.CounterValue(t, reg, "coderd_workspace_builds_enqueued_total", prometheus.Labels{
|
||||||
|
"provisioner_type": string(database.ProvisionerTypeEcho),
|
||||||
|
"build_reason": string(database.BuildReasonAutostart),
|
||||||
|
"transition": string(database.WorkspaceTransitionStart),
|
||||||
|
"status": wsbuilder.BuildStatusSuccess,
|
||||||
|
})
|
||||||
|
require.Equal(t, 1, autostartCount, "autostart should record 1 enqueue with build_reason=autostart")
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustSchedule(t *testing.T, s string) *cron.Schedule {
|
||||||
|
t.Helper()
|
||||||
|
sched, err := cron.Weekly(s)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return sched
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
package wsbuilder
|
||||||
|
|
||||||
|
import "github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
// Metrics holds metrics related to workspace build creation.
|
||||||
|
type Metrics struct {
|
||||||
|
workspaceBuildsEnqueued *prometheus.CounterVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric label values for build status.
|
||||||
|
const (
|
||||||
|
BuildStatusSuccess = "success"
|
||||||
|
BuildStatusFailed = "failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewMetrics(reg prometheus.Registerer) (*Metrics, error) {
|
||||||
|
m := &Metrics{
|
||||||
|
workspaceBuildsEnqueued: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Name: "workspace_builds_enqueued_total",
|
||||||
|
Help: "Total number of workspace build enqueue attempts.",
|
||||||
|
}, []string{"provisioner_type", "build_reason", "transition", "status"}),
|
||||||
|
}
|
||||||
|
|
||||||
|
if reg != nil {
|
||||||
|
if err := reg.Register(m.workspaceBuildsEnqueued); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordBuildEnqueued records a workspace build enqueue attempt. It determines
|
||||||
|
// the status based on whether an error occurred and increments the counter.
|
||||||
|
func (m *Metrics) RecordBuildEnqueued(provisionerType, buildReason, transition string, err error) {
|
||||||
|
status := BuildStatusSuccess
|
||||||
|
if err != nil {
|
||||||
|
status = BuildStatusFailed
|
||||||
|
}
|
||||||
|
m.workspaceBuildsEnqueued.WithLabelValues(provisionerType, buildReason, transition, status).Inc()
|
||||||
|
}
|
||||||
@@ -90,6 +90,8 @@ type Builder struct {
|
|||||||
|
|
||||||
prebuiltWorkspaceBuildStage sdkproto.PrebuiltWorkspaceBuildStage
|
prebuiltWorkspaceBuildStage sdkproto.PrebuiltWorkspaceBuildStage
|
||||||
verifyNoLegacyParametersOnce bool
|
verifyNoLegacyParametersOnce bool
|
||||||
|
|
||||||
|
buildMetrics *Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
type UsageChecker interface {
|
type UsageChecker interface {
|
||||||
@@ -253,6 +255,12 @@ func (b Builder) TemplateVersionPresetID(id uuid.UUID) Builder {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b Builder) BuildMetrics(m *Metrics) Builder {
|
||||||
|
// nolint: revive
|
||||||
|
b.buildMetrics = m
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
type BuildError struct {
|
type BuildError struct {
|
||||||
// Status is a suitable HTTP status code
|
// Status is a suitable HTTP status code
|
||||||
Status int
|
Status int
|
||||||
@@ -313,11 +321,34 @@ func (b *Builder) Build(
|
|||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
b.recordBuildMetrics(provisionerJob, err)
|
||||||
return nil, nil, nil, xerrors.Errorf("build tx: %w", err)
|
return nil, nil, nil, xerrors.Errorf("build tx: %w", err)
|
||||||
}
|
}
|
||||||
|
b.recordBuildMetrics(provisionerJob, nil)
|
||||||
return workspaceBuild, provisionerJob, provisionerDaemons, nil
|
return workspaceBuild, provisionerJob, provisionerDaemons, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// recordBuildMetrics records the workspace build enqueue metric if metrics are
|
||||||
|
// configured. It determines the appropriate build reason label, using "prebuild"
|
||||||
|
// for prebuild operations instead of the database reason.
|
||||||
|
func (b *Builder) recordBuildMetrics(job *database.ProvisionerJob, err error) {
|
||||||
|
if b.buildMetrics == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if job == nil || !job.Provisioner.Valid() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the build reason for metrics. Prebuilds use BuildReasonInitiator
|
||||||
|
// in the database but we want to track them separately in metrics.
|
||||||
|
buildReason := string(b.reason)
|
||||||
|
if b.prebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CREATE {
|
||||||
|
buildReason = provisionerdserver.BuildReasonPrebuild
|
||||||
|
}
|
||||||
|
|
||||||
|
b.buildMetrics.RecordBuildEnqueued(string(job.Provisioner), buildReason, string(b.trans), err)
|
||||||
|
}
|
||||||
|
|
||||||
// buildTx contains the business logic of computing a new build. Attributes of the new database objects are computed
|
// buildTx contains the business logic of computing a new build. Attributes of the new database objects are computed
|
||||||
// in a functional style, rather than imperative, to emphasize the logic of how they are defined. A simple cache
|
// in a functional style, rather than imperative, to emphasize the logic of how they are defined. A simple cache
|
||||||
// of database-fetched objects is stored on the struct to ensure we only fetch things once, even if they are used in
|
// of database-fetched objects is stored on the struct to ensure we only fetch things once, even if they are used in
|
||||||
|
|||||||
@@ -158,11 +158,13 @@ deployment. They will always be available from the agent.
|
|||||||
| `coderd_oauth2_external_requests_rate_limit_used` | gauge | The number of requests made in this interval. | `name` `resource` |
|
| `coderd_oauth2_external_requests_rate_limit_used` | gauge | The number of requests made in this interval. | `name` `resource` |
|
||||||
| `coderd_oauth2_external_requests_total` | counter | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code` |
|
| `coderd_oauth2_external_requests_total` | counter | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code` |
|
||||||
| `coderd_prebuilt_workspace_claim_duration_seconds` | histogram | Time to claim a prebuilt workspace by organization, template, and preset. | `organization_name` `preset_name` `template_name` |
|
| `coderd_prebuilt_workspace_claim_duration_seconds` | histogram | Time to claim a prebuilt workspace by organization, template, and preset. | `organization_name` `preset_name` `template_name` |
|
||||||
|
| `coderd_provisioner_job_queue_wait_seconds` | histogram | Time from job creation to acquisition by a provisioner daemon. | `build_reason` `job_type` `provisioner_type` `transition` |
|
||||||
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
|
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
|
||||||
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
|
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
|
||||||
| `coderd_provisionerd_num_daemons` | gauge | The number of provisioner daemons. | |
|
| `coderd_provisionerd_num_daemons` | gauge | The number of provisioner daemons. | |
|
||||||
| `coderd_provisionerd_workspace_build_timings_seconds` | histogram | The time taken for a workspace to build. | `status` `template_name` `template_version` `workspace_transition` |
|
| `coderd_provisionerd_workspace_build_timings_seconds` | histogram | The time taken for a workspace to build. | `status` `template_name` `template_version` `workspace_transition` |
|
||||||
| `coderd_template_workspace_build_duration_seconds` | histogram | Duration from workspace build creation to agent ready, by template. | `is_prebuild` `organization_name` `status` `template_name` `transition` |
|
| `coderd_template_workspace_build_duration_seconds` | histogram | Duration from workspace build creation to agent ready, by template. | `is_prebuild` `organization_name` `status` `template_name` `transition` |
|
||||||
|
| `coderd_workspace_builds_enqueued_total` | counter | Total number of workspace build enqueue attempts. | `build_reason` `provisioner_type` `status` `transition` |
|
||||||
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
|
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
|
||||||
| `coderd_workspace_creation_duration_seconds` | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild). | `organization_name` `preset_name` `template_name` `type` |
|
| `coderd_workspace_creation_duration_seconds` | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild). | `organization_name` `preset_name` `template_name` `type` |
|
||||||
| `coderd_workspace_creation_total` | counter | Total regular (non-prebuilt) workspace creations by organization, template, and preset. | `organization_name` `preset_name` `template_name` |
|
| `coderd_workspace_creation_total` | counter | Total regular (non-prebuilt) workspace creations by organization, template, and preset. | `organization_name` `preset_name` `template_name` |
|
||||||
|
|||||||
@@ -370,6 +370,7 @@ func TestEnterpriseCreateWithPreset(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -483,6 +484,7 @@ func TestEnterpriseCreateWithPreset(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
|
|||||||
@@ -1331,6 +1331,7 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
api.TracerProvider,
|
api.TracerProvider,
|
||||||
int(api.DeploymentValues.PostgresConnMaxOpen.Value()),
|
int(api.DeploymentValues.PostgresConnMaxOpen.Value()),
|
||||||
|
api.AGPL.WorkspaceBuilderMetrics,
|
||||||
)
|
)
|
||||||
return reconciler, prebuilds.NewEnterpriseClaimer()
|
return reconciler, prebuilds.NewEnterpriseClaimer()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -174,6 +174,7 @@ func TestClaimPrebuild(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
|
|||||||
@@ -204,6 +204,7 @@ func TestMetricsCollector(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
ctx := testutil.Context(t, testutil.WaitLong)
|
ctx := testutil.Context(t, testutil.WaitLong)
|
||||||
|
|
||||||
@@ -344,6 +345,7 @@ func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
ctx := testutil.Context(t, testutil.WaitLong)
|
ctx := testutil.Context(t, testutil.WaitLong)
|
||||||
|
|
||||||
@@ -500,6 +502,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
ctx := testutil.Context(t, testutil.WaitLong)
|
ctx := testutil.Context(t, testutil.WaitLong)
|
||||||
|
|
||||||
@@ -537,6 +540,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
ctx := testutil.Context(t, testutil.WaitLong)
|
ctx := testutil.Context(t, testutil.WaitLong)
|
||||||
|
|
||||||
@@ -574,6 +578,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
ctx := testutil.Context(t, testutil.WaitLong)
|
ctx := testutil.Context(t, testutil.WaitLong)
|
||||||
|
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ type StoreReconciler struct {
|
|||||||
metrics *MetricsCollector
|
metrics *MetricsCollector
|
||||||
// Operational metrics
|
// Operational metrics
|
||||||
reconciliationDuration prometheus.Histogram
|
reconciliationDuration prometheus.Histogram
|
||||||
|
workspaceBuilderMetrics *wsbuilder.Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ prebuilds.ReconciliationOrchestrator = &StoreReconciler{}
|
var _ prebuilds.ReconciliationOrchestrator = &StoreReconciler{}
|
||||||
@@ -99,6 +100,7 @@ func NewStoreReconciler(store database.Store,
|
|||||||
buildUsageChecker *atomic.Pointer[wsbuilder.UsageChecker],
|
buildUsageChecker *atomic.Pointer[wsbuilder.UsageChecker],
|
||||||
tracerProvider trace.TracerProvider,
|
tracerProvider trace.TracerProvider,
|
||||||
maxDBConnections int,
|
maxDBConnections int,
|
||||||
|
workspaceBuilderMetrics *wsbuilder.Metrics,
|
||||||
) *StoreReconciler {
|
) *StoreReconciler {
|
||||||
reconciliationConcurrency := calculateReconciliationConcurrency(maxDBConnections)
|
reconciliationConcurrency := calculateReconciliationConcurrency(maxDBConnections)
|
||||||
|
|
||||||
@@ -120,6 +122,7 @@ func NewStoreReconciler(store database.Store,
|
|||||||
done: make(chan struct{}, 1),
|
done: make(chan struct{}, 1),
|
||||||
provisionNotifyCh: make(chan database.ProvisionerJob, 10),
|
provisionNotifyCh: make(chan database.ProvisionerJob, 10),
|
||||||
reconciliationConcurrency: reconciliationConcurrency,
|
reconciliationConcurrency: reconciliationConcurrency,
|
||||||
|
workspaceBuilderMetrics: workspaceBuilderMetrics,
|
||||||
}
|
}
|
||||||
|
|
||||||
if registerer != nil {
|
if registerer != nil {
|
||||||
@@ -1052,7 +1055,8 @@ func (c *StoreReconciler) provision(
|
|||||||
builder := wsbuilder.New(workspace, transition, *c.buildUsageChecker.Load()).
|
builder := wsbuilder.New(workspace, transition, *c.buildUsageChecker.Load()).
|
||||||
Reason(database.BuildReasonInitiator).
|
Reason(database.BuildReasonInitiator).
|
||||||
Initiator(database.PrebuildsSystemUserID).
|
Initiator(database.PrebuildsSystemUserID).
|
||||||
MarkPrebuild()
|
MarkPrebuild().
|
||||||
|
BuildMetrics(c.workspaceBuilderMetrics)
|
||||||
|
|
||||||
if transition != database.WorkspaceTransitionDelete {
|
if transition != database.WorkspaceTransitionDelete {
|
||||||
// We don't specify the version for a delete transition,
|
// We don't specify the version for a delete transition,
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// given a template version with no presets
|
// given a template version with no presets
|
||||||
@@ -112,6 +113,7 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// given there are presets, but no prebuilds
|
// given there are presets, but no prebuilds
|
||||||
@@ -450,6 +452,7 @@ func (tc testCase) run(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Run the reconciliation multiple times to ensure idempotency
|
// Run the reconciliation multiple times to ensure idempotency
|
||||||
@@ -527,6 +530,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
@@ -658,6 +662,7 @@ func TestPrebuildScheduling(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
@@ -767,6 +772,7 @@ func TestInvalidPreset(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
@@ -837,6 +843,7 @@ func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
@@ -939,6 +946,7 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Set up test environment with a template, version, and preset.
|
// Set up test environment with a template, version, and preset.
|
||||||
@@ -1090,6 +1098,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Set up test environment with a template, version, and preset.
|
// Set up test environment with a template, version, and preset.
|
||||||
@@ -1291,6 +1300,7 @@ func TestRunLoop(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
@@ -1423,6 +1433,7 @@ func TestReconcilerLifecycle(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// When: the reconciler is stopped (simulating the prebuilds feature being disabled)
|
// When: the reconciler is stopped (simulating the prebuilds feature being disabled)
|
||||||
@@ -1438,6 +1449,7 @@ func TestReconcilerLifecycle(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Gracefully stop the reconciliation loop
|
// Gracefully stop the reconciliation loop
|
||||||
@@ -1471,6 +1483,7 @@ func TestFailedBuildBackoff(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Given: an active template version with presets and prebuilds configured.
|
// Given: an active template version with presets and prebuilds configured.
|
||||||
@@ -1595,6 +1608,7 @@ func TestReconciliationLock(t *testing.T) {
|
|||||||
newNoopEnqueuer(),
|
newNoopEnqueuer(),
|
||||||
newNoopUsageCheckerPtr(), noop.NewTracerProvider(),
|
newNoopUsageCheckerPtr(), noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
|
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
|
||||||
lockObtained := mutex.TryLock()
|
lockObtained := mutex.TryLock()
|
||||||
@@ -1633,6 +1647,7 @@ func TestTrackResourceReplacement(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Given: a template admin to receive a notification.
|
// Given: a template admin to receive a notification.
|
||||||
@@ -1793,6 +1808,7 @@ func TestExpiredPrebuildsMultipleActions(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Set up test environment with a template, version, and preset
|
// Set up test environment with a template, version, and preset
|
||||||
@@ -2258,6 +2274,7 @@ func TestCancelPendingPrebuilds(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
owner := coderdtest.CreateFirstUser(t, client)
|
owner := coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
@@ -2503,6 +2520,7 @@ func TestCancelPendingPrebuilds(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
owner := coderdtest.CreateFirstUser(t, client)
|
owner := coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
@@ -2576,6 +2594,7 @@ func TestReconciliationStats(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
owner := coderdtest.CreateFirstUser(t, client)
|
owner := coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
@@ -3066,6 +3085,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Setup a template with a preset that should create prebuilds
|
// Setup a template with a preset that should create prebuilds
|
||||||
@@ -3172,6 +3192,7 @@ func BenchmarkReconcileAll_NoOps(b *testing.B) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
maxOpenConns,
|
maxOpenConns,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
org := dbgen.Organization(b, db, database.Organization{})
|
org := dbgen.Organization(b, db, database.Organization{})
|
||||||
@@ -3283,6 +3304,7 @@ func BenchmarkReconcileAll_ConnectionContention(b *testing.B) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
maxOpenConns,
|
maxOpenConns,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create presets from active template versions that need reconciliation actions
|
// Create presets from active template versions that need reconciliation actions
|
||||||
@@ -3402,6 +3424,7 @@ func BenchmarkReconcileAll_Mix(b *testing.B) {
|
|||||||
newNoopUsageCheckerPtr(),
|
newNoopUsageCheckerPtr(),
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
maxOpenConns,
|
maxOpenConns,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
|
|
||||||
org := dbgen.Organization(b, db, database.Organization{})
|
org := dbgen.Organization(b, db, database.Organization{})
|
||||||
|
|||||||
@@ -1991,6 +1991,7 @@ func TestPrebuildsAutobuild(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -2115,6 +2116,7 @@ func TestPrebuildsAutobuild(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -2239,6 +2241,7 @@ func TestPrebuildsAutobuild(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -2385,6 +2388,7 @@ func TestPrebuildsAutobuild(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -2532,6 +2536,7 @@ func TestPrebuildsAutobuild(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
@@ -2979,6 +2984,7 @@ func TestWorkspaceProvisionerdServerMetrics(t *testing.T) {
|
|||||||
api.AGPL.BuildUsageChecker,
|
api.AGPL.BuildUsageChecker,
|
||||||
noop.NewTracerProvider(),
|
noop.NewTracerProvider(),
|
||||||
10,
|
10,
|
||||||
|
nil,
|
||||||
)
|
)
|
||||||
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer()
|
||||||
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
api.AGPL.PrebuildsClaimer.Store(&claimer)
|
||||||
|
|||||||
@@ -990,3 +990,13 @@ coderd_license_warnings 0
|
|||||||
# HELP coderd_license_errors The number of active license errors.
|
# HELP coderd_license_errors The number of active license errors.
|
||||||
# TYPE coderd_license_errors gauge
|
# TYPE coderd_license_errors gauge
|
||||||
coderd_license_errors 0
|
coderd_license_errors 0
|
||||||
|
# HELP coderd_provisioner_job_queue_wait_seconds Time from job creation to acquisition by a provisioner daemon.
|
||||||
|
# TYPE coderd_provisioner_job_queue_wait_seconds histogram
|
||||||
|
coderd_provisioner_job_queue_wait_seconds_bucket{build_reason="initiator",job_type="workspace_build",provisioner_type="terraform",transition="stop",le="300"} 1
|
||||||
|
coderd_provisioner_job_queue_wait_seconds_bucket{build_reason="initiator",job_type="workspace_build",provisioner_type="terraform",transition="stop",le="+Inf"} 1
|
||||||
|
coderd_provisioner_job_queue_wait_seconds_sum{build_reason="initiator",job_type="workspace_build",provisioner_type="terraform",transition="stop"} 0.01191
|
||||||
|
coderd_provisioner_job_queue_wait_seconds_count{build_reason="initiator",job_type="workspace_build",provisioner_type="terraform",transition="stop"} 1
|
||||||
|
# HELP coderd_workspace_builds_enqueued_total Total number of workspace build enqueue attempts.
|
||||||
|
# TYPE coderd_workspace_builds_enqueued_total counter
|
||||||
|
coderd_workspace_builds_enqueued_total{build_reason="dashboard",provisioner_type="terraform",status="success",transition="start"} 1
|
||||||
|
coderd_workspace_builds_enqueued_total{build_reason="initiator",provisioner_type="terraform",status="success",transition="stop"} 1
|
||||||
|
|||||||
Reference in New Issue
Block a user