mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
perf: don't call GetUserByID unnecessarily for Agents metrics loops (#19395)
At the moment, the loop which retrieves and updates the values of the agents metrics excessively calls `GetUserByID` (a DB query). First it retrieves a list of all workspaces, filtering out inactive agents (not entirely clear to me whether this is non-running workspaces, or just dead agents), and then iterates over those workspaces to get the rest of the relevant data for the metrics. The next call is `GetUserByID` for `workspace.OwnerID`. This is unnecessary because the `workspaces_visible` view we pull workspaces from has already been joined with the users table to get the username/name/etc. This should at least partially resolve https://github.com/coder/internal/issues/726 --------- Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
@@ -41,11 +41,12 @@ func TestUpdateStates(t *testing.T) {
|
||||
Name: "tpl",
|
||||
}
|
||||
workspace = database.Workspace{
|
||||
ID: uuid.New(),
|
||||
OwnerID: user.ID,
|
||||
TemplateID: template.ID,
|
||||
Name: "xyz",
|
||||
TemplateName: template.Name,
|
||||
ID: uuid.New(),
|
||||
OwnerID: user.ID,
|
||||
OwnerUsername: user.Username,
|
||||
TemplateID: template.ID,
|
||||
Name: "xyz",
|
||||
TemplateName: template.Name,
|
||||
}
|
||||
agent = database.WorkspaceAgent{
|
||||
ID: uuid.New(),
|
||||
@@ -138,9 +139,6 @@ func TestUpdateStates(t *testing.T) {
|
||||
// Workspace gets fetched.
|
||||
dbM.EXPECT().GetWorkspaceByAgentID(gomock.Any(), agent.ID).Return(workspace, nil)
|
||||
|
||||
// User gets fetched to hit the UpdateAgentMetricsFn.
|
||||
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
|
||||
|
||||
// We expect an activity bump because ConnectionCount > 0.
|
||||
dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{
|
||||
WorkspaceID: workspace.ID,
|
||||
@@ -380,9 +378,6 @@ func TestUpdateStates(t *testing.T) {
|
||||
LastUsedAt: now.UTC(),
|
||||
}).Return(nil)
|
||||
|
||||
// User gets fetched to hit the UpdateAgentMetricsFn.
|
||||
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
|
||||
|
||||
resp, err := api.UpdateStats(context.Background(), req)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, &agentproto.UpdateStatsResponse{
|
||||
@@ -498,9 +493,6 @@ func TestUpdateStates(t *testing.T) {
|
||||
LastUsedAt: now,
|
||||
}).Return(nil)
|
||||
|
||||
// User gets fetched to hit the UpdateAgentMetricsFn.
|
||||
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
|
||||
|
||||
// Ensure that pubsub notifications are sent.
|
||||
notifyDescription := make(chan struct{})
|
||||
ps.SubscribeWithErr(wspubsub.WorkspaceEventChannel(workspace.OwnerID),
|
||||
|
||||
@@ -328,29 +328,24 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
|
||||
templateVersionName = "unknown"
|
||||
}
|
||||
|
||||
user, err := db.GetUserByID(ctx, workspace.OwnerID)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
|
||||
continue
|
||||
}
|
||||
// username :=
|
||||
|
||||
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(agents) == 0 {
|
||||
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, agent := range agents {
|
||||
// Collect information about agents
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
|
||||
agentsGauge.WithLabelValues(VectorOperationAdd, 1, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
|
||||
|
||||
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
|
||||
node := (*coordinator.Load()).Node(agent.ID)
|
||||
@@ -360,7 +355,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
|
||||
tailnetNode = node.ID.String()
|
||||
}
|
||||
|
||||
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, user.Username, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
|
||||
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, workspace.OwnerUsername, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
|
||||
|
||||
if node == nil {
|
||||
logger.Debug(ctx, "can't read in-memory node for agent", slog.F("agent_id", agent.ID))
|
||||
@@ -385,7 +380,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
|
||||
}
|
||||
}
|
||||
|
||||
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, user.Username, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
|
||||
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, workspace.OwnerUsername, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -397,7 +392,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
|
||||
}
|
||||
|
||||
for _, app := range apps {
|
||||
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, user.Username, workspace.Name, app.DisplayName, string(app.Health))
|
||||
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, workspace.OwnerUsername, workspace.Name, app.DisplayName, string(app.Health))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -126,13 +126,8 @@ func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspac
|
||||
|
||||
// update prometheus metrics
|
||||
if r.opts.UpdateAgentMetricsFn != nil {
|
||||
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get user: %w", err)
|
||||
}
|
||||
|
||||
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{
|
||||
Username: user.Username,
|
||||
Username: workspace.OwnerUsername,
|
||||
WorkspaceName: workspace.Name,
|
||||
AgentName: workspaceAgent.Name,
|
||||
TemplateName: templateName,
|
||||
|
||||
Reference in New Issue
Block a user