feat: add WatchAllWorkspaceBuilds endpoint for autostart scaletests (#22057)

This PR adds a `WatchAllWorkspaces` function with `watch-all-workspaces`
endpoint, which can be used to listen on a single global pubsub channel
for _all_ workspace build updates, and makes use of it in the autostart
scaletest.

This negates the need to use a workspace watch pubsub channel _per_
workspace, which has auth overhead associated with each call. This is
especially relevant in situations such as the autostart scaletest, where
we need to start/stop a set of workspaces before we can configure their
autostart config. The overhead associated with all the watch requests
skews the scaletest results and makes it harder to reason about the
performance of the autostart feature itself.

The autostart scaletest also no longer generates its own metrics nor
does it wait for all the workspaces to actually start via autostart. We
should update the scaletest dashboard after both PRs are merged to
measure autostart performance via the new metrics.



The new function/endpoint and its usage in the autostart scaletest are
gated behind an experiment feature flag, this is something we should
discuss whether we want to enable the endpoint in prod by default or
not. If so, we can remove the experiment.

---------

Signed-off-by: Callum Styan <callumstyan@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Callum Styan <callum@coder.com>
This commit is contained in:
Callum Styan
2026-03-13 20:37:41 -07:00
committed by GitHub
parent b492c42624
commit 36665e17b2
23 changed files with 1296 additions and 249 deletions
+107
View File
@@ -3674,6 +3674,113 @@ func TestWorkspaceWatcher(t *testing.T) {
wait("second is for the build cancel", nil)
}
func TestWatchAllWorkspaceBuilds(t *testing.T) {
t.Parallel()
// Enable the workspace build updates experiment.
client, closer := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
DeploymentValues: coderdtest.DeploymentValues(t, func(dv *codersdk.DeploymentValues) {
dv.Experiments = []string{string(codersdk.ExperimentWorkspaceBuildUpdates)}
}),
})
defer closer.Close()
user := coderdtest.CreateFirstUser(t, client)
// Create a simple template version.
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.PlanComplete,
ProvisionGraph: []*proto.Response{{
Type: &proto.Response_Graph{
Graph: &proto.GraphComplete{
Resources: []*proto.Resource{{
Name: "example",
Type: "aws_instance",
}},
},
},
}},
})
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
// Subscribe to all workspace build updates via SSE BEFORE creating workspaces
// so we can use it to wait for the initial builds.
decoder, err := client.WatchAllWorkspaceBuilds(ctx)
require.NoError(t, err)
defer decoder.Close()
updates := decoder.Chan()
logger := testutil.Logger(t).Named(t.Name())
// Helper to wait for a specific update.
waitForUpdate := func(event string, workspaceID uuid.UUID, expectedTransition, expectedStatus string) codersdk.WorkspaceBuildUpdate {
t.Helper()
for {
select {
case <-ctx.Done():
require.FailNow(t, "timed out waiting for event", event)
return codersdk.WorkspaceBuildUpdate{}
case update, ok := <-updates:
if !ok {
require.FailNow(t, "updates channel closed", event)
return codersdk.WorkspaceBuildUpdate{}
}
logger.Info(ctx, "received workspace build update",
slog.F("event", event),
slog.F("workspace_id", update.WorkspaceID),
slog.F("build_id", update.BuildID),
slog.F("transition", update.Transition),
slog.F("job_status", update.JobStatus),
slog.F("build_number", update.BuildNumber))
if update.WorkspaceID == workspaceID && update.Transition == expectedTransition && update.JobStatus == expectedStatus {
return update
}
// Keep waiting if this isn't the update we're looking for.
logger.Info(ctx, "skipping update, not matching expected",
slog.F("expected_workspace_id", workspaceID),
slog.F("expected_transition", expectedTransition),
slog.F("expected_status", expectedStatus))
}
}
}
// Create two workspaces and wait for their initial builds via the SSE channel.
workspace1 := coderdtest.CreateWorkspace(t, client, template.ID)
update := waitForUpdate("workspace1 initial build", workspace1.ID, "start", "succeeded")
require.Equal(t, workspace1.ID, update.WorkspaceID)
require.Equal(t, int32(1), update.BuildNumber)
workspace2 := coderdtest.CreateWorkspace(t, client, template.ID)
update = waitForUpdate("workspace2 initial build", workspace2.ID, "start", "succeeded")
require.Equal(t, workspace2.ID, update.WorkspaceID)
require.Equal(t, int32(1), update.BuildNumber)
// Stop workspace 1.
_ = coderdtest.CreateWorkspaceBuild(t, client, workspace1, database.WorkspaceTransitionStop)
update = waitForUpdate("workspace1 stop", workspace1.ID, "stop", "succeeded")
require.Equal(t, workspace1.ID, update.WorkspaceID)
// Stop workspace 2.
_ = coderdtest.CreateWorkspaceBuild(t, client, workspace2, database.WorkspaceTransitionStop)
update = waitForUpdate("workspace2 stop", workspace2.ID, "stop", "succeeded")
require.Equal(t, workspace2.ID, update.WorkspaceID)
// Start workspace 1 again.
_ = coderdtest.CreateWorkspaceBuild(t, client, workspace1, database.WorkspaceTransitionStart)
update = waitForUpdate("workspace1 start", workspace1.ID, "start", "succeeded")
require.Equal(t, workspace1.ID, update.WorkspaceID)
// Start workspace 2 again.
_ = coderdtest.CreateWorkspaceBuild(t, client, workspace2, database.WorkspaceTransitionStart)
update = waitForUpdate("workspace2 start", workspace2.ID, "start", "succeeded")
require.Equal(t, workspace2.ID, update.WorkspaceID)
}
func mustLocation(t *testing.T, location string) *time.Location {
t.Helper()
loc, err := time.LoadLocation(location)