Skip to content

Commit 014a2d5

Browse files
authored
perf: don't call GetUserByID unnecessarily for Agents metrics loops (#19395)
At the moment, the loop which retrieves and updates the values of the agents metrics excessively calls `GetUserByID` (a DB query). First it retrieves a list of all workspaces, filtering out inactive agents (not entirely clear to me whether this is non-running workspaces, or just dead agents), and then iterates over those workspaces to get the rest of the relevant data for the metrics. The next call is `GetUserByID` for `workspace.OwnerID`. This is unnecessary because the `workspaces_visible` view we pull workspaces from has already been joined with the users table to get the username/name/etc. This should at least partially resolve coder/internal#726 --------- Signed-off-by: Callum Styan <callumstyan@gmail.com>
1 parent 8aafbcb commit 014a2d5

File tree

3 files changed

+14
-32
lines changed

3 files changed

+14
-32
lines changed

coderd/agentapi/stats_test.go

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ func TestUpdateStates(t *testing.T) {
4141
Name: "tpl",
4242
}
4343
workspace = database.Workspace{
44-
ID: uuid.New(),
45-
OwnerID: user.ID,
46-
TemplateID: template.ID,
47-
Name: "xyz",
48-
TemplateName: template.Name,
44+
ID: uuid.New(),
45+
OwnerID: user.ID,
46+
OwnerUsername: user.Username,
47+
TemplateID: template.ID,
48+
Name: "xyz",
49+
TemplateName: template.Name,
4950
}
5051
agent = database.WorkspaceAgent{
5152
ID: uuid.New(),
@@ -138,9 +139,6 @@ func TestUpdateStates(t *testing.T) {
138139
// Workspace gets fetched.
139140
dbM.EXPECT().GetWorkspaceByAgentID(gomock.Any(), agent.ID).Return(workspace, nil)
140141

141-
// User gets fetched to hit the UpdateAgentMetricsFn.
142-
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
143-
144142
// We expect an activity bump because ConnectionCount > 0.
145143
dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{
146144
WorkspaceID: workspace.ID,
@@ -380,9 +378,6 @@ func TestUpdateStates(t *testing.T) {
380378
LastUsedAt: now.UTC(),
381379
}).Return(nil)
382380

383-
// User gets fetched to hit the UpdateAgentMetricsFn.
384-
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
385-
386381
resp, err := api.UpdateStats(context.Background(), req)
387382
require.NoError(t, err)
388383
require.Equal(t, &agentproto.UpdateStatsResponse{
@@ -498,9 +493,6 @@ func TestUpdateStates(t *testing.T) {
498493
LastUsedAt: now,
499494
}).Return(nil)
500495

501-
// User gets fetched to hit the UpdateAgentMetricsFn.
502-
dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil)
503-
504496
// Ensure that pubsub notifications are sent.
505497
notifyDescription := make(chan struct{})
506498
ps.SubscribeWithErr(wspubsub.WorkspaceEventChannel(workspace.OwnerID),

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -328,29 +328,24 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
328328
templateVersionName = "unknown"
329329
}
330330

331-
user, err := db.GetUserByID(ctx, workspace.OwnerID)
332-
if err != nil {
333-
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
334-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
335-
continue
336-
}
331+
// username :=
337332

338333
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
339334
if err != nil {
340335
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
341-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
336+
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
342337
continue
343338
}
344339

345340
if len(agents) == 0 {
346341
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
347-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
342+
agentsGauge.WithLabelValues(VectorOperationAdd, 0, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
348343
continue
349344
}
350345

351346
for _, agent := range agents {
352347
// Collect information about agents
353-
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
348+
agentsGauge.WithLabelValues(VectorOperationAdd, 1, workspace.OwnerUsername, workspace.Name, templateName, templateVersionName)
354349

355350
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
356351
node := (*coordinator.Load()).Node(agent.ID)
@@ -360,7 +355,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
360355
tailnetNode = node.ID.String()
361356
}
362357

363-
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, user.Username, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
358+
agentsConnectionsGauge.WithLabelValues(VectorOperationSet, 1, agent.Name, workspace.OwnerUsername, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode)
364359

365360
if node == nil {
366361
logger.Debug(ctx, "can't read in-memory node for agent", slog.F("agent_id", agent.ID))
@@ -385,7 +380,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
385380
}
386381
}
387382

388-
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, user.Username, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
383+
agentsConnectionLatenciesGauge.WithLabelValues(VectorOperationSet, latency, agent.Name, workspace.OwnerUsername, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID))
389384
}
390385
}
391386

@@ -397,7 +392,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
397392
}
398393

399394
for _, app := range apps {
400-
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, user.Username, workspace.Name, app.DisplayName, string(app.Health))
395+
agentsAppsGauge.WithLabelValues(VectorOperationAdd, 1, agent.Name, workspace.OwnerUsername, workspace.Name, app.DisplayName, string(app.Health))
401396
}
402397
}
403398
}

coderd/workspacestats/reporter.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,8 @@ func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspac
126126

127127
// update prometheus metrics
128128
if r.opts.UpdateAgentMetricsFn != nil {
129-
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID)
130-
if err != nil {
131-
return xerrors.Errorf("get user: %w", err)
132-
}
133-
134129
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{
135-
Username: user.Username,
130+
Username: workspace.OwnerUsername,
136131
WorkspaceName: workspace.Name,
137132
AgentName: workspaceAgent.Name,
138133
TemplateName: templateName,

0 commit comments

Comments
 (0)