perf: don't call GetUserByID unnecessarily for Agents metrics loops (#19395)

At the moment, the loop which retrieves and updates the values of the
agents metrics excessively calls `GetUserByID` (a DB query). First it
retrieves a list of all workspaces, filtering out inactive agents (not
entirely clear to me whether this is non-running workspaces, or just
dead agents), and then iterates over those workspaces to get the rest of
the relevant data for the metrics. The next call is `GetUserByID` for
`workspace.OwnerID`. This is unnecessary because the `workspaces_visible` view we pull workspaces from has already been joined with the users table to get the username/name/etc.

This should at least partially resolve
https://github.com/coder/internal/issues/726 
---------

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan
2025-08-21 11:01:32 -07:00
committed by GitHub
parent 8aafbcb3be
commit 014a2d5b0f
3 changed files with 14 additions and 32 deletions
+6 -14
View File
@@ -41,11 +41,12 @@ func TestUpdateStates(t *testing.T) {
Name: "tpl",
}
workspace = database.Workspace{
ID: uuid.New(),
OwnerID: user.ID,
TemplateID: template.ID,
Name: "xyz",
TemplateName: template.Name,
ID: uuid.New(),
OwnerID: user.ID,
OwnerUsername: user.Username,
TemplateID: template.ID,
Name: "xyz",
TemplateName: template.Name,
}
agent = database.WorkspaceAgent{
ID: uuid.New(),
@@ -138,9 +139,6 @@ func TestUpdateStates(t *testing.T) {
// Workspace gets fetched.
dbM.EXPECT().GetWorkspaceByAgentID(gomock.Any(), agent.ID).Return(workspace, nil)
// User gets fetched to hit the UpdateAgentMetricsFn.
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
// We expect an activity bump because ConnectionCount > 0.
dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{
WorkspaceID: workspace.ID,
@@ -380,9 +378,6 @@ func TestUpdateStates(t *testing.T) {
LastUsedAt: now.UTC(),
}).Return(nil)
// User gets fetched to hit the UpdateAgentMetricsFn.
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
resp, err := api.UpdateStats(context.Background(), req)
require.NoError(t, err)
require.Equal(t, &agentproto.UpdateStatsResponse{
@@ -498,9 +493,6 @@ func TestUpdateStates(t *testing.T) {
LastUsedAt: now,
}).Return(nil)
// User gets fetched to hit the UpdateAgentMetricsFn.
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
// Ensure that pubsub notifications are sent.
notifyDescription := make(chan struct{})
ps.SubscribeWithErr(wspubsub.WorkspaceEventChannel(workspace.OwnerID),
+7 -12
View File
@@ -328,29 +328,24 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
templateVersionName = "unknown"
}
user, err := db.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}
// username :=
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
if err != nil {
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
continue
}
if len(agents) == 0 {
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
continue
}
for _, agent := range agents {
// Collect information about agents
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
agentsGauge.WithLabelValues(VectorOperationAdd, 1, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
node := (*coordinator.Load()).Node(agent.ID)
@@ -360,7 +355,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
tailnetNode = node.ID.String()
}
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, user.Username, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, workspace.OwnerUsername, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
if node == nil {
logger.Debug(ctx, "can't read in-memory node for agent", slog.F("agent_id", agent.ID))
@@ -385,7 +380,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
}
}
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, user.Username, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, workspace.OwnerUsername, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
}
}
@@ -397,7 +392,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
}
for _, app := range apps {
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, user.Username, workspace.Name, app.DisplayName, string(app.Health))
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, workspace.OwnerUsername, workspace.Name, app.DisplayName, string(app.Health))
}
}
}
+1 -6
View File
@@ -126,13 +126,8 @@ func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspac
// update prometheus metrics
if r.opts.UpdateAgentMetricsFn != nil {
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
return xerrors.Errorf("get user: %w", err)
}
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{
Username: user.Username,
Username: workspace.OwnerUsername,
WorkspaceName: workspace.Name,
AgentName: workspaceAgent.Name,
TemplateName: templateName,