mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
feat: add e2e workspace build duration metric (#21739)
Adds coderd_template_workspace_build_duration_seconds histogram that tracks the full duration from workspace build creation to agent ready. This captures the complete user-perceived build time including provisioning and agent startup. The metric is emitted when the agent reports ready/error/timeout via the lifecycle API, ensuring each build is counted exactly once per replica.
This commit is contained in:
@@ -3886,6 +3886,14 @@ func (q *querier) GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx context.Conte
|
||||
return q.db.GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx, arg)
|
||||
}
|
||||
|
||||
func (q *querier) GetWorkspaceBuildMetricsByResourceID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceBuildMetricsByResourceIDRow, error) {
|
||||
// Verify access to the resource first.
|
||||
if _, err := q.GetWorkspaceResourceByID(ctx, id); err != nil {
|
||||
return database.GetWorkspaceBuildMetricsByResourceIDRow{}, err
|
||||
}
|
||||
return q.db.GetWorkspaceBuildMetricsByResourceID(ctx, id)
|
||||
}
|
||||
|
||||
func (q *querier) GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]database.WorkspaceBuildParameter, error) {
|
||||
// Authorized call to get the workspace build. If we can read the build,
|
||||
// we can read the params.
|
||||
|
||||
@@ -2036,6 +2036,18 @@ func (s *MethodTestSuite) TestWorkspace() {
|
||||
dbm.EXPECT().GetWorkspaceByID(gomock.Any(), build.WorkspaceID).Return(ws, nil).AnyTimes()
|
||||
check.Args(res.ID).Asserts(ws, policy.ActionRead).Returns(res)
|
||||
}))
|
||||
s.Run("GetWorkspaceBuildMetricsByResourceID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
|
||||
ws := testutil.Fake(s.T(), faker, database.Workspace{})
|
||||
build := testutil.Fake(s.T(), faker, database.WorkspaceBuild{WorkspaceID: ws.ID})
|
||||
job := testutil.Fake(s.T(), faker, database.ProvisionerJob{ID: build.JobID, Type: database.ProvisionerJobTypeWorkspaceBuild})
|
||||
res := testutil.Fake(s.T(), faker, database.WorkspaceResource{JobID: build.JobID})
|
||||
dbm.EXPECT().GetWorkspaceResourceByID(gomock.Any(), res.ID).Return(res, nil).AnyTimes()
|
||||
dbm.EXPECT().GetProvisionerJobByID(gomock.Any(), res.JobID).Return(job, nil).AnyTimes()
|
||||
dbm.EXPECT().GetWorkspaceBuildByJobID(gomock.Any(), res.JobID).Return(build, nil).AnyTimes()
|
||||
dbm.EXPECT().GetWorkspaceByID(gomock.Any(), build.WorkspaceID).Return(ws, nil).AnyTimes()
|
||||
dbm.EXPECT().GetWorkspaceBuildMetricsByResourceID(gomock.Any(), res.ID).Return(database.GetWorkspaceBuildMetricsByResourceIDRow{}, nil).AnyTimes()
|
||||
check.Args(res.ID).Asserts(ws, policy.ActionRead).Returns(database.GetWorkspaceBuildMetricsByResourceIDRow{})
|
||||
}))
|
||||
s.Run("Build/GetWorkspaceResourcesByJobID", s.Mocked(func(dbm *dbmock.MockStore, faker *gofakeit.Faker, check *expects) {
|
||||
ws := testutil.Fake(s.T(), faker, database.Workspace{})
|
||||
build := testutil.Fake(s.T(), faker, database.WorkspaceBuild{WorkspaceID: ws.ID})
|
||||
|
||||
@@ -2406,6 +2406,14 @@ func (m queryMetricsStore) GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx cont
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) GetWorkspaceBuildMetricsByResourceID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceBuildMetricsByResourceIDRow, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.GetWorkspaceBuildMetricsByResourceID(ctx, id)
|
||||
m.queryLatencies.WithLabelValues("GetWorkspaceBuildMetricsByResourceID").Observe(time.Since(start).Seconds())
|
||||
m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetWorkspaceBuildMetricsByResourceID").Inc()
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
func (m queryMetricsStore) GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]database.WorkspaceBuildParameter, error) {
|
||||
start := time.Now()
|
||||
r0, r1 := m.s.GetWorkspaceBuildParameters(ctx, workspaceBuildID)
|
||||
|
||||
@@ -4499,6 +4499,21 @@ func (mr *MockStoreMockRecorder) GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ct
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceBuildByWorkspaceIDAndBuildNumber", reflect.TypeOf((*MockStore)(nil).GetWorkspaceBuildByWorkspaceIDAndBuildNumber), ctx, arg)
|
||||
}
|
||||
|
||||
// GetWorkspaceBuildMetricsByResourceID mocks base method.
|
||||
func (m *MockStore) GetWorkspaceBuildMetricsByResourceID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceBuildMetricsByResourceIDRow, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "GetWorkspaceBuildMetricsByResourceID", ctx, id)
|
||||
ret0, _ := ret[0].(database.GetWorkspaceBuildMetricsByResourceIDRow)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// GetWorkspaceBuildMetricsByResourceID indicates an expected call of GetWorkspaceBuildMetricsByResourceID.
|
||||
func (mr *MockStoreMockRecorder) GetWorkspaceBuildMetricsByResourceID(ctx, id any) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceBuildMetricsByResourceID", reflect.TypeOf((*MockStore)(nil).GetWorkspaceBuildMetricsByResourceID), ctx, id)
|
||||
}
|
||||
|
||||
// GetWorkspaceBuildParameters mocks base method.
|
||||
func (m *MockStore) GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]database.WorkspaceBuildParameter, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
||||
@@ -501,6 +501,9 @@ type sqlcQuerier interface {
|
||||
GetWorkspaceBuildByID(ctx context.Context, id uuid.UUID) (WorkspaceBuild, error)
|
||||
GetWorkspaceBuildByJobID(ctx context.Context, jobID uuid.UUID) (WorkspaceBuild, error)
|
||||
GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx context.Context, arg GetWorkspaceBuildByWorkspaceIDAndBuildNumberParams) (WorkspaceBuild, error)
|
||||
// Returns build metadata for e2e workspace build duration metrics.
|
||||
// Also checks if all agents are ready and returns the worst status.
|
||||
GetWorkspaceBuildMetricsByResourceID(ctx context.Context, id uuid.UUID) (GetWorkspaceBuildMetricsByResourceIDRow, error)
|
||||
GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]WorkspaceBuildParameter, error)
|
||||
GetWorkspaceBuildParametersByBuildIDs(ctx context.Context, workspaceBuildIds []uuid.UUID) ([]WorkspaceBuildParameter, error)
|
||||
GetWorkspaceBuildStatsByTemplates(ctx context.Context, since time.Time) ([]GetWorkspaceBuildStatsByTemplatesRow, error)
|
||||
|
||||
@@ -21344,6 +21344,62 @@ func (q *sqlQuerier) GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx context.Co
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getWorkspaceBuildMetricsByResourceID = `-- name: GetWorkspaceBuildMetricsByResourceID :one
|
||||
SELECT
|
||||
wb.created_at,
|
||||
wb.transition,
|
||||
t.name AS template_name,
|
||||
o.name AS organization_name,
|
||||
(w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0') AS is_prebuild,
|
||||
-- All agents must have ready_at set (terminal startup state)
|
||||
COUNT(*) FILTER (WHERE wa.ready_at IS NULL) = 0 AS all_agents_ready,
|
||||
-- Latest ready_at across all agents (for duration calculation)
|
||||
MAX(wa.ready_at)::timestamptz AS last_agent_ready_at,
|
||||
-- Worst status: error > timeout > ready
|
||||
CASE
|
||||
WHEN bool_or(wa.lifecycle_state = 'start_error') THEN 'error'
|
||||
WHEN bool_or(wa.lifecycle_state = 'start_timeout') THEN 'timeout'
|
||||
ELSE 'success'
|
||||
END AS worst_status
|
||||
FROM workspace_builds wb
|
||||
JOIN workspaces w ON wb.workspace_id = w.id
|
||||
JOIN templates t ON w.template_id = t.id
|
||||
JOIN organizations o ON t.organization_id = o.id
|
||||
JOIN workspace_resources wr ON wr.job_id = wb.job_id
|
||||
JOIN workspace_agents wa ON wa.resource_id = wr.id
|
||||
WHERE wb.job_id = (SELECT job_id FROM workspace_resources WHERE workspace_resources.id = $1)
|
||||
GROUP BY wb.created_at, wb.transition, t.name, o.name, w.owner_id
|
||||
`
|
||||
|
||||
type GetWorkspaceBuildMetricsByResourceIDRow struct {
|
||||
CreatedAt time.Time `db:"created_at" json:"created_at"`
|
||||
Transition WorkspaceTransition `db:"transition" json:"transition"`
|
||||
TemplateName string `db:"template_name" json:"template_name"`
|
||||
OrganizationName string `db:"organization_name" json:"organization_name"`
|
||||
IsPrebuild bool `db:"is_prebuild" json:"is_prebuild"`
|
||||
AllAgentsReady bool `db:"all_agents_ready" json:"all_agents_ready"`
|
||||
LastAgentReadyAt time.Time `db:"last_agent_ready_at" json:"last_agent_ready_at"`
|
||||
WorstStatus string `db:"worst_status" json:"worst_status"`
|
||||
}
|
||||
|
||||
// Returns build metadata for e2e workspace build duration metrics.
|
||||
// Also checks if all agents are ready and returns the worst status.
|
||||
func (q *sqlQuerier) GetWorkspaceBuildMetricsByResourceID(ctx context.Context, id uuid.UUID) (GetWorkspaceBuildMetricsByResourceIDRow, error) {
|
||||
row := q.db.QueryRowContext(ctx, getWorkspaceBuildMetricsByResourceID, id)
|
||||
var i GetWorkspaceBuildMetricsByResourceIDRow
|
||||
err := row.Scan(
|
||||
&i.CreatedAt,
|
||||
&i.Transition,
|
||||
&i.TemplateName,
|
||||
&i.OrganizationName,
|
||||
&i.IsPrebuild,
|
||||
&i.AllAgentsReady,
|
||||
&i.LastAgentReadyAt,
|
||||
&i.WorstStatus,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getWorkspaceBuildStatsByTemplates = `-- name: GetWorkspaceBuildStatsByTemplates :many
|
||||
SELECT
|
||||
w.template_id,
|
||||
|
||||
@@ -243,3 +243,31 @@ SET
|
||||
has_external_agent = @has_external_agent,
|
||||
updated_at = @updated_at::timestamptz
|
||||
WHERE id = @id::uuid;
|
||||
|
||||
-- name: GetWorkspaceBuildMetricsByResourceID :one
|
||||
-- Returns build metadata for e2e workspace build duration metrics.
|
||||
-- Also checks if all agents are ready and returns the worst status.
|
||||
SELECT
|
||||
wb.created_at,
|
||||
wb.transition,
|
||||
t.name AS template_name,
|
||||
o.name AS organization_name,
|
||||
(w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0') AS is_prebuild,
|
||||
-- All agents must have ready_at set (terminal startup state)
|
||||
COUNT(*) FILTER (WHERE wa.ready_at IS NULL) = 0 AS all_agents_ready,
|
||||
-- Latest ready_at across all agents (for duration calculation)
|
||||
MAX(wa.ready_at)::timestamptz AS last_agent_ready_at,
|
||||
-- Worst status: error > timeout > ready
|
||||
CASE
|
||||
WHEN bool_or(wa.lifecycle_state = 'start_error') THEN 'error'
|
||||
WHEN bool_or(wa.lifecycle_state = 'start_timeout') THEN 'timeout'
|
||||
ELSE 'success'
|
||||
END AS worst_status
|
||||
FROM workspace_builds wb
|
||||
JOIN workspaces w ON wb.workspace_id = w.id
|
||||
JOIN templates t ON w.template_id = t.id
|
||||
JOIN organizations o ON t.organization_id = o.id
|
||||
JOIN workspace_resources wr ON wr.job_id = wb.job_id
|
||||
JOIN workspace_agents wa ON wa.resource_id = wr.id
|
||||
WHERE wb.job_id = (SELECT job_id FROM workspace_resources WHERE workspace_resources.id = $1)
|
||||
GROUP BY wb.created_at, wb.transition, t.name, o.name, w.owner_id;
|
||||
|
||||
Reference in New Issue
Block a user