Files
coder/support/support_test.go
T
Rowan Smith b163b4c950 feat: support bundle updates to enable pprof and telemetry collection (#21486)
- Adds pprof collection support now that we have the listeners
automatically starting (requires Coder server 2.28.0+, includes a
version check). Collects heap, allocs, profile (30s), block, mutex,
goroutine, threadcreate, trace (30s), cmdline, symbol. Performs capture
for 30 seconds and emits a log line stating as such. Enable capture by
supplying the `--pprof` flag or `CODER_SUPPORT_BUNDLE_PPROF` env var.
Collection of pprof data from both coderd and the Coder agent occurs.
- Adds collection of Prometheus metrics, also requires 2.28.0+
- Adds the ability to include a template in the bundle independently of
supplying the details of a running workspace by supplying the
`--template` flag or `CODER_SUPPORT_BUNDLE_TEMPLATE` env var
- Captures a list of workspaces the user has access to. Defaults to a
max of 10, configurable via `--workspaces-total-cap` /
`CODER_SUPPORT_BUNDLE_WORKSPACES_TOTAL_CAP`
- Collects additional stats from the coderd deployment (aggregated
workspace/session metrics), as well as entitlements via license and
dismissed health checks.

created with help from mux
2026-01-20 10:28:52 +11:00

280 lines
12 KiB
Go

package support_test
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"cdr.dev/slog/v3"
"cdr.dev/slog/v3/sloggers/sloghuman"
"cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbfake"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/support"
"github.com/coder/coder/v2/testutil"
"github.com/coder/serpent"
)
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m, testutil.GoleakOptions...)
}
func TestRun(t *testing.T) {
t.Parallel()
t.Run("OK", func(t *testing.T) {
t.Parallel()
cfg := coderdtest.DeploymentValues(t)
promPort := testutil.RandomPort(t)
cfg.Prometheus.Enable = serpent.Bool(true)
cfg.Prometheus.Address.Host = "127.0.0.1"
cfg.Prometheus.Address.Port = fmt.Sprintf("%d", promPort)
cfg.Experiments = []string{"foo"}
ctx := testutil.Context(t, testutil.WaitLong)
client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{
DeploymentValues: cfg,
Logger: ptr.Ref(slog.Make(sloghuman.Sink(io.Discard))),
})
admin := coderdtest.CreateFirstUser(t, client)
ws, agt := setupWorkspaceAndAgent(ctx, t, client, db, admin)
bun, err := support.Run(ctx, &support.Deps{
Client: client,
Log: testutil.Logger(t).Named("bundle"),
WorkspaceID: ws.ID,
AgentID: agt.ID,
})
require.NoError(t, err)
assertNotNilNotEmpty(t, bun, "bundle should be present")
assertNotNilNotEmpty(t, bun.Deployment.BuildInfo, "deployment build info should be present")
assertNotNilNotEmpty(t, bun.Deployment.Config, "deployment config should be present")
assertNotNilNotEmpty(t, bun.Deployment.Config.Options, "deployment config should be present")
assertSanitizedDeploymentConfig(t, bun.Deployment.Config)
assertNotNilNotEmpty(t, bun.Deployment.HealthReport, "deployment health report should be present")
assertNotNilNotEmpty(t, bun.Deployment.Experiments, "deployment experiments should be present")
require.NotNil(t, bun.Deployment.Licenses, "license status should be present")
assertNotNilNotEmpty(t, bun.Network.ConnectionInfo, "agent connection info should be present")
assertNotNilNotEmpty(t, bun.Network.CoordinatorDebug, "network coordinator debug should be present")
assertNotNilNotEmpty(t, bun.Network.Netcheck, "network netcheck should be present")
assertNotNilNotEmpty(t, bun.Network.TailnetDebug, "network tailnet debug should be present")
assertNotNilNotEmpty(t, bun.Network.Interfaces, "network interfaces health should be present")
assertNotNilNotEmpty(t, bun.Workspace.Workspace, "workspace should be present")
assertSanitizedWorkspace(t, bun.Workspace.Workspace)
assertNotNilNotEmpty(t, bun.Workspace.BuildLogs, "workspace build logs should be present")
assertNotNilNotEmpty(t, bun.Workspace.Template, "workspace template should be present")
assertNotNilNotEmpty(t, bun.Workspace.TemplateVersion, "workspace template version should be present")
assertNotNilNotEmpty(t, bun.Workspace.TemplateFileBase64, "workspace template file should be present")
require.NotNil(t, bun.Workspace.Parameters, "workspace parameters should be present")
assertNotNilNotEmpty(t, bun.Agent.Agent, "agent should be present")
assertSanitizedEnv(t, bun.Agent.Agent.EnvironmentVariables)
assertNotNilNotEmpty(t, bun.Agent.ListeningPorts, "agent listening ports should be present")
assertNotNilNotEmpty(t, bun.Agent.Logs, "agent logs should be present")
assertNotNilNotEmpty(t, bun.Agent.Manifest, "agent manifest should be present")
assertSanitizedEnv(t, bun.Agent.Manifest.EnvironmentVariables)
assertNotNilNotEmpty(t, bun.Agent.AgentMagicsockHTML, "agent magicsock should be present")
assertNotNilNotEmpty(t, bun.Agent.ClientMagicsockHTML, "client magicsock should be present")
assertNotNilNotEmpty(t, bun.Agent.PeerDiagnostics, "agent peer diagnostics should be present")
assertNotNilNotEmpty(t, bun.Agent.PingResult, "agent ping result should be present")
assertNotNilNotEmpty(t, bun.Agent.Prometheus, "agent prometheus metrics should be present")
assertNotNilNotEmpty(t, bun.Deployment.Prometheus, "deployment prometheus metrics should be present")
assertNotNilNotEmpty(t, bun.Agent.StartupLogs, "agent startup logs should be present")
assertNotNilNotEmpty(t, bun.Logs, "bundle logs should be present")
assert.Nil(t, bun.Pprof.Server, "server pprof should not be collected without CollectPprof")
assert.Nil(t, bun.Pprof.Agent, "agent pprof should not be collected without CollectPprof")
// New: deployment health settings should be present
assertNotNilNotEmpty(t, bun.Deployment.HealthSettings, "deployment health settings should be present")
// New: aggregated workspaces should be present and include created workspace
assert.NotNil(t, bun.Deployment.Workspaces, "deployment workspaces should be present")
assert.GreaterOrEqual(t, bun.Deployment.Workspaces.Count, 1)
for _, aws := range bun.Deployment.Workspaces.Workspaces {
for _, res := range aws.LatestBuild.Resources {
for _, a := range res.Agents {
assertSanitizedEnv(t, a.EnvironmentVariables)
}
}
}
})
t.Run("OK_NoWorkspace", func(t *testing.T) {
t.Parallel()
cfg := coderdtest.DeploymentValues(t)
cfg.Experiments = []string{"foo"}
ctx := testutil.Context(t, testutil.WaitLong)
client := coderdtest.New(t, &coderdtest.Options{
DeploymentValues: cfg,
Logger: ptr.Ref(slog.Make(sloghuman.Sink(io.Discard))),
})
_ = coderdtest.CreateFirstUser(t, client)
bun, err := support.Run(ctx, &support.Deps{
Client: client,
Log: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Named("bundle").Leveled(slog.LevelDebug),
})
require.NoError(t, err)
assertNotNilNotEmpty(t, bun, "bundle should be present")
assertNotNilNotEmpty(t, bun.Deployment.BuildInfo, "deployment build info should be present")
assertNotNilNotEmpty(t, bun.Deployment.Config, "deployment config should be present")
assertNotNilNotEmpty(t, bun.Deployment.Config.Options, "deployment config should be present")
assertSanitizedDeploymentConfig(t, bun.Deployment.Config)
assertNotNilNotEmpty(t, bun.Deployment.HealthReport, "deployment health report should be present")
assertNotNilNotEmpty(t, bun.Deployment.Experiments, "deployment experiments should be present")
assertNotNilNotEmpty(t, bun.Network.ConnectionInfo, "agent connection info should be present")
assertNotNilNotEmpty(t, bun.Network.CoordinatorDebug, "network coordinator debug should be present")
assertNotNilNotEmpty(t, bun.Network.Netcheck, "network netcheck should be present")
assertNotNilNotEmpty(t, bun.Network.TailnetDebug, "network tailnet debug should be present")
assertNotNilNotEmpty(t, bun.Network.Interfaces, "network interfaces health should be present")
assert.Empty(t, bun.Workspace.Workspace, "did not expect workspace to be present")
assert.Empty(t, bun.Agent, "did not expect agent to be present")
assertNotNilNotEmpty(t, bun.Logs, "bundle logs should be present")
assert.Nil(t, bun.Pprof.Server, "server pprof should not be collected without CollectPprof")
assert.Nil(t, bun.Pprof.Agent, "agent pprof should not be collected without CollectPprof")
// New: health settings should be present even without workspace context
assertNotNilNotEmpty(t, bun.Deployment.HealthSettings, "deployment health settings should be present")
// New: aggregated workspaces struct should exist (may be empty)
assert.NotNil(t, bun.Deployment.Workspaces)
})
t.Run("NoAuth", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
client := coderdtest.New(t, &coderdtest.Options{
Logger: ptr.Ref(slog.Make(sloghuman.Sink(io.Discard))),
})
bun, err := support.Run(ctx, &support.Deps{
Client: client,
Log: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Named("bundle").Leveled(slog.LevelDebug),
})
var sdkErr *codersdk.Error
require.NotNil(t, bun)
require.ErrorAs(t, err, &sdkErr)
require.Equal(t, http.StatusUnauthorized, sdkErr.StatusCode())
require.NotEmpty(t, bun)
require.NotEmpty(t, bun.Logs)
})
t.Run("MissingPrivilege", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
client := coderdtest.New(t, &coderdtest.Options{
Logger: ptr.Ref(slog.Make(sloghuman.Sink(io.Discard))),
})
admin := coderdtest.CreateFirstUser(t, client)
memberClient, _ := coderdtest.CreateAnotherUser(t, client, admin.OrganizationID)
bun, err := support.Run(ctx, &support.Deps{
Client: memberClient,
Log: testutil.Logger(t).Named("bundle"),
})
require.ErrorContains(t, err, "failed authorization check")
require.NotEmpty(t, bun)
require.NotEmpty(t, bun.Logs)
})
}
func assertSanitizedDeploymentConfig(t *testing.T, dc *codersdk.DeploymentConfig) {
t.Helper()
for _, opt := range dc.Options {
if opt.Annotations.IsSet("secret") {
assert.Empty(t, opt.Value.String())
}
}
}
func assertSanitizedWorkspace(t *testing.T, ws codersdk.Workspace) {
t.Helper()
for _, res := range ws.LatestBuild.Resources {
for _, agt := range res.Agents {
assertSanitizedEnv(t, agt.EnvironmentVariables)
}
}
}
func assertSanitizedEnv(t *testing.T, env map[string]string) {
t.Helper()
for k, v := range env {
assert.Equal(t, "***REDACTED***", v, "environment variable %q not sanitized", k)
}
}
func setupWorkspaceAndAgent(ctx context.Context, t *testing.T, client *codersdk.Client, db database.Store, user codersdk.CreateFirstUserResponse) (codersdk.Workspace, codersdk.WorkspaceAgent) {
// This is a valid zip file
zipBytes := make([]byte, 22)
zipBytes[0] = 80
zipBytes[1] = 75
zipBytes[2] = 0o5
zipBytes[3] = 0o6
uploadRes, err := client.Upload(ctx, codersdk.ContentTypeZip, bytes.NewReader(zipBytes))
require.NoError(t, err)
tv := dbfake.TemplateVersion(t, db).
FileID(uploadRes.ID).
Seed(database.TemplateVersion{
OrganizationID: user.OrganizationID,
CreatedBy: user.UserID,
}).
Do()
wbr := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OrganizationID: user.OrganizationID,
OwnerID: user.UserID,
TemplateID: tv.Template.ID,
}).Resource().WithAgent().Do()
ws, err := client.Workspace(ctx, wbr.Workspace.ID)
require.NoError(t, err)
agt := ws.LatestBuild.Resources[0].Agents[0]
// Insert a provisioner job log
_, err = db.InsertProvisionerJobLogs(ctx, database.InsertProvisionerJobLogsParams{
JobID: wbr.Build.JobID,
CreatedAt: []time.Time{dbtime.Now()},
Source: []database.LogSource{database.LogSourceProvisionerDaemon},
Level: []database.LogLevel{database.LogLevelInfo},
Stage: []string{"The World"},
Output: []string{"Players"},
})
require.NoError(t, err)
// Insert an agent log
_, err = db.InsertWorkspaceAgentLogs(ctx, database.InsertWorkspaceAgentLogsParams{
AgentID: agt.ID,
CreatedAt: dbtime.Now(),
Output: []string{"Bond, James Bond"},
Level: []database.LogLevel{database.LogLevelInfo},
LogSourceID: wbr.Build.JobID,
OutputLength: 0o7,
})
require.NoError(t, err)
tempDir := t.TempDir()
logPath := filepath.Join(tempDir, "coder-agent.log")
require.NoError(t, os.WriteFile(logPath, []byte("hello from the agent"), 0o600))
_ = agenttest.New(t, client.URL, wbr.AgentToken, func(o *agent.Options) {
o.LogDir = tempDir
})
coderdtest.NewWorkspaceAgentWaiter(t, client, wbr.Workspace.ID).Wait()
return ws, agt
}
func assertNotNilNotEmpty[T any](t *testing.T, v T, msg string) {
t.Helper()
if assert.NotNil(t, v, msg+" but was nil") {
assert.NotEmpty(t, v, msg+" but was empty")
}
}