chore(cli): seed healthcheck cache in TestSupportBundle (#21436)

Fixes https://github.com/coder/internal/issues/272

This test periodically fails due to the healthcheck timing out.
The problem is compounded due to the fact that we stand up a new
coderdtest instance for each test.

This PR does the following:
* Updates the subtests to share a single `coderdtest` instance.
* Hits the `/debug/health` endpoint before completing the setup phase so
that the result is cached.

This will not completely remove the issue, as the healthcheck could
still fail due to test-infrastructure-related issues. In this case we
may decide to add a retry in this 'seed' function.
This commit is contained in:
Cian Johnston
2026-01-07 08:47:31 +00:00
committed by GitHub
parent c3e3249a2a
commit 6bd2d1c85f
+100 -78
View File
@@ -3,6 +3,7 @@ package cli_test
import (
"archive/zip"
"bytes"
"context"
"encoding/json"
"io"
"net/http"
@@ -25,6 +26,7 @@ import (
"github.com/coder/coder/v2/cli/clitest"
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbfake"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
@@ -43,81 +45,69 @@ func TestSupportBundle(t *testing.T) {
t.Skip("for some reason, windows fails to remove tempdirs sometimes")
}
t.Run("Workspace", func(t *testing.T) {
// Support bundle tests can share a single coderdtest instance.
var dc codersdk.DeploymentConfig
secretValue := uuid.NewString()
seedSecretDeploymentOptions(t, &dc, secretValue)
client, closer, api := coderdtest.NewWithAPI(t, &coderdtest.Options{
DeploymentValues: dc.Values,
HealthcheckTimeout: testutil.WaitSuperLong,
})
t.Cleanup(func() { closer.Close() })
owner := coderdtest.CreateFirstUser(t, client)
memberClient, member := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID)
// Set up test fixtures
setupCtx := testutil.Context(t, testutil.WaitSuperLong)
workspaceWithAgent := setupSupportBundleTestFixture(setupCtx, t, api.Database, owner.OrganizationID, owner.UserID, func(agents []*proto.Agent) []*proto.Agent {
// This should not show up in the bundle output
agents[0].Env["SECRET_VALUE"] = secretValue
return agents
})
workspaceWithoutAgent := setupSupportBundleTestFixture(setupCtx, t, api.Database, owner.OrganizationID, owner.UserID, nil)
memberWorkspace := setupSupportBundleTestFixture(setupCtx, t, api.Database, owner.OrganizationID, member.ID, nil)
// Wait for healthcheck to complete successfully before continuing with sub-tests.
// The result is cached so subsequent requests will be fast.
healthcheckDone := make(chan *healthsdk.HealthcheckReport)
go func() {
defer close(healthcheckDone)
hc, err := healthsdk.New(client).DebugHealth(setupCtx)
if err != nil {
assert.NoError(t, err, "seed healthcheck cache")
return
}
healthcheckDone <- &hc
}()
if _, ok := testutil.AssertReceive(setupCtx, t, healthcheckDone); !ok {
t.Fatal("healthcheck did not complete in time -- this may be a transient issue")
}
t.Run("WorkspaceWithAgent", func(t *testing.T) {
t.Parallel()
var dc codersdk.DeploymentConfig
secretValue := uuid.NewString()
seedSecretDeploymentOptions(t, &dc, secretValue)
client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{
DeploymentValues: dc.Values,
HealthcheckTimeout: testutil.WaitSuperLong,
})
owner := coderdtest.CreateFirstUser(t, client)
r := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OrganizationID: owner.OrganizationID,
OwnerID: owner.UserID,
}).WithAgent(func(agents []*proto.Agent) []*proto.Agent {
// This should not show up in the bundle output
agents[0].Env["SECRET_VALUE"] = secretValue
return agents
}).Do()
ctx := testutil.Context(t, testutil.WaitShort)
ws, err := client.Workspace(ctx, r.Workspace.ID)
require.NoError(t, err)
tempDir := t.TempDir()
logPath := filepath.Join(tempDir, "coder-agent.log")
require.NoError(t, os.WriteFile(logPath, []byte("hello from the agent"), 0o600))
agt := agenttest.New(t, client.URL, r.AgentToken, func(o *agent.Options) {
agt := agenttest.New(t, client.URL, workspaceWithAgent.AgentToken, func(o *agent.Options) {
o.LogDir = tempDir
})
defer agt.Close()
coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait()
ctx = testutil.Context(t, testutil.WaitShort) // Reset timeout after waiting for agent.
// Insert a provisioner job log
_, err = db.InsertProvisionerJobLogs(ctx, database.InsertProvisionerJobLogsParams{
JobID: r.Build.JobID,
CreatedAt: []time.Time{dbtime.Now()},
Source: []database.LogSource{database.LogSourceProvisionerDaemon},
Level: []database.LogLevel{database.LogLevelInfo},
Stage: []string{"provision"},
Output: []string{"done"},
})
require.NoError(t, err)
// Insert an agent log
_, err = db.InsertWorkspaceAgentLogs(ctx, database.InsertWorkspaceAgentLogsParams{
AgentID: ws.LatestBuild.Resources[0].Agents[0].ID,
CreatedAt: dbtime.Now(),
Output: []string{"started up"},
Level: []database.LogLevel{database.LogLevelInfo},
LogSourceID: r.Build.JobID,
OutputLength: 10,
})
require.NoError(t, err)
coderdtest.NewWorkspaceAgentWaiter(t, client, workspaceWithAgent.Workspace.ID).Wait()
d := t.TempDir()
path := filepath.Join(d, "bundle.zip")
inv, root := clitest.New(t, "support", "bundle", r.Workspace.Name, "--output-file", path, "--yes")
inv, root := clitest.New(t, "support", "bundle", workspaceWithAgent.Workspace.Name, "--output-file", path, "--yes")
//nolint: gocritic // requires owner privilege
clitest.SetupConfig(t, client, root)
err = inv.Run()
err := inv.Run()
require.NoError(t, err)
assertBundleContents(t, path, true, true, []string{secretValue})
})
t.Run("NoWorkspace", func(t *testing.T) {
t.Parallel()
var dc codersdk.DeploymentConfig
secretValue := uuid.NewString()
seedSecretDeploymentOptions(t, &dc, secretValue)
client := coderdtest.New(t, &coderdtest.Options{
DeploymentValues: dc.Values,
HealthcheckTimeout: testutil.WaitSuperLong,
})
_ = coderdtest.CreateFirstUser(t, client)
d := t.TempDir()
path := filepath.Join(d, "bundle.zip")
@@ -131,21 +121,9 @@ func TestSupportBundle(t *testing.T) {
t.Run("NoAgent", func(t *testing.T) {
t.Parallel()
var dc codersdk.DeploymentConfig
secretValue := uuid.NewString()
seedSecretDeploymentOptions(t, &dc, secretValue)
client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{
DeploymentValues: dc.Values,
HealthcheckTimeout: testutil.WaitSuperLong,
})
admin := coderdtest.CreateFirstUser(t, client)
r := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OrganizationID: admin.OrganizationID,
OwnerID: admin.UserID,
}).Do() // without agent!
d := t.TempDir()
path := filepath.Join(d, "bundle.zip")
inv, root := clitest.New(t, "support", "bundle", r.Workspace.Name, "--output-file", path, "--yes")
inv, root := clitest.New(t, "support", "bundle", workspaceWithoutAgent.Workspace.Name, "--output-file", path, "--yes")
//nolint: gocritic // requires owner privilege
clitest.SetupConfig(t, client, root)
err := inv.Run()
@@ -155,14 +133,7 @@ func TestSupportBundle(t *testing.T) {
t.Run("NoPrivilege", func(t *testing.T) {
t.Parallel()
client, db := coderdtest.NewWithDatabase(t, nil)
user := coderdtest.CreateFirstUser(t, client)
memberClient, member := coderdtest.CreateAnotherUser(t, client, user.OrganizationID)
r := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OrganizationID: user.OrganizationID,
OwnerID: member.ID,
}).WithAgent().Do()
inv, root := clitest.New(t, "support", "bundle", r.Workspace.Name, "--yes")
inv, root := clitest.New(t, "support", "bundle", memberWorkspace.Workspace.Name, "--yes")
clitest.SetupConfig(t, memberClient, root)
err := inv.Run()
require.ErrorContains(t, err, "failed authorization check")
@@ -269,7 +240,7 @@ func assertBundleContents(t *testing.T, path string, wantWorkspace bool, wantAge
require.NotEmpty(t, v, "workspace should not be empty")
case "workspace/build_logs.txt":
bs := readBytesFromZip(t, f)
if !wantWorkspace || !wantAgent {
if !wantWorkspace {
require.Empty(t, bs, "expected workspace build logs to be empty")
continue
}
@@ -433,3 +404,54 @@ func seedSecretDeploymentOptions(t *testing.T, dc *codersdk.DeploymentConfig, se
}
}
}
func setupSupportBundleTestFixture(
ctx context.Context,
t testing.TB,
db database.Store,
orgID, ownerID uuid.UUID,
withAgent func([]*proto.Agent) []*proto.Agent,
) dbfake.WorkspaceResponse {
t.Helper()
// nolint: gocritic // Used for seeding test data only.
ctx = dbauthz.AsSystemRestricted(ctx)
b := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{
OrganizationID: orgID,
OwnerID: ownerID,
})
if withAgent != nil {
b = b.WithAgent(withAgent)
}
r := b.Do()
_, err := db.InsertProvisionerJobLogs(ctx, database.InsertProvisionerJobLogsParams{
JobID: r.Build.JobID,
CreatedAt: []time.Time{dbtime.Now()},
Source: []database.LogSource{database.LogSourceProvisionerDaemon},
Level: []database.LogLevel{database.LogLevelInfo},
Stage: []string{"provision"},
Output: []string{"done"},
})
require.NoError(t, err)
if withAgent != nil {
res, err := db.GetWorkspaceResourcesByJobID(ctx, r.Build.JobID)
require.NoError(t, err)
var resIDs []uuid.UUID
for _, res := range res {
resIDs = append(resIDs, res.ID)
}
agents, err := db.GetWorkspaceAgentsByResourceIDs(ctx, resIDs)
require.NoError(t, err)
for _, agt := range agents {
_, err = db.InsertWorkspaceAgentLogs(ctx, database.InsertWorkspaceAgentLogsParams{
AgentID: agt.ID,
CreatedAt: dbtime.Now(),
Output: []string{"started up"},
Level: []database.LogLevel{database.LogLevelInfo},
LogSourceID: r.Build.JobID,
OutputLength: 10,
})
require.NoError(t, err)
}
}
return r
}