diff --git a/coderd/database/db2sdk/db2sdk.go b/coderd/database/db2sdk/db2sdk.go index ee595f26cd..2d6906d2f4 100644 --- a/coderd/database/db2sdk/db2sdk.go +++ b/coderd/database/db2sdk/db2sdk.go @@ -538,6 +538,12 @@ func WorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordinator, switch { case workspaceAgent.Status != codersdk.WorkspaceAgentConnected && workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleOff: workspaceAgent.Health.Reason = "agent is not running" + case workspaceAgent.Status == codersdk.WorkspaceAgentConnecting: + // Note: the case above catches connecting+off as "not running". + // This case handles connecting agents with a non-off lifecycle + // (e.g. "created" or "starting"), where the agent binary has + // not yet established a connection to coderd. + workspaceAgent.Health.Reason = "agent has not yet connected" case workspaceAgent.Status == codersdk.WorkspaceAgentTimeout: workspaceAgent.Health.Reason = "agent is taking too long to connect" case workspaceAgent.Status == codersdk.WorkspaceAgentDisconnected: diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index cb2f9167f4..8fffb1f5b1 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -91,7 +91,7 @@ func TestWorkspaceAgent(t *testing.T) { require.Equal(t, tmpDir, workspace.LatestBuild.Resources[0].Agents[0].Directory) _, err = anotherClient.WorkspaceAgent(ctx, workspace.LatestBuild.Resources[0].Agents[0].ID) require.NoError(t, err) - require.True(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy) + require.False(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy) }) t.Run("HasFallbackTroubleshootingURL", func(t *testing.T) { t.Parallel() diff --git a/coderd/workspaces_test.go b/coderd/workspaces_test.go index 04c41f255f..7f6b2559d2 100644 --- a/coderd/workspaces_test.go +++ b/coderd/workspaces_test.go @@ -213,6 +213,39 @@ func TestWorkspace(t *testing.T) { t.Parallel() t.Run("Healthy", func(t *testing.T) { + t.Parallel() + client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + user := coderdtest.CreateFirstUser(t, client) + authToken := uuid.NewString() + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionGraph: echo.ProvisionGraphWithAgent(authToken), + }) + coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, template.ID) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + + _ = agenttest.New(t, client.URL, authToken) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + var err error + testutil.Eventually(ctx, t, func(ctx context.Context) bool { + workspace, err = client.Workspace(ctx, workspace.ID) + return assert.NoError(t, err) && workspace.Health.Healthy + }, testutil.IntervalMedium) + + agent := workspace.LatestBuild.Resources[0].Agents[0] + + assert.True(t, workspace.Health.Healthy) + assert.Equal(t, []uuid.UUID{}, workspace.Health.FailingAgents) + assert.True(t, agent.Health.Healthy) + assert.Empty(t, agent.Health.Reason) + }) + + t.Run("Connecting", func(t *testing.T) { t.Parallel() client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) user := coderdtest.CreateFirstUser(t, client) @@ -247,10 +280,10 @@ func TestWorkspace(t *testing.T) { agent := workspace.LatestBuild.Resources[0].Agents[0] - assert.True(t, workspace.Health.Healthy) - assert.Equal(t, []uuid.UUID{}, workspace.Health.FailingAgents) - assert.True(t, agent.Health.Healthy) - assert.Empty(t, agent.Health.Reason) + assert.False(t, workspace.Health.Healthy) + assert.Equal(t, []uuid.UUID{agent.ID}, workspace.Health.FailingAgents) + assert.False(t, agent.Health.Healthy) + assert.Equal(t, "agent has not yet connected", agent.Health.Reason) }) t.Run("Unhealthy", func(t *testing.T) { @@ -302,6 +335,7 @@ func TestWorkspace(t *testing.T) { t.Parallel() client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) user := coderdtest.CreateFirstUser(t, client) + a1AuthToken := uuid.NewString() version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ Parse: echo.ParseComplete, ProvisionGraph: []*proto.Response{{ @@ -313,7 +347,9 @@ func TestWorkspace(t *testing.T) { Agents: []*proto.Agent{{ Id: uuid.NewString(), Name: "a1", - Auth: &proto.Agent_Token{}, + Auth: &proto.Agent_Token{ + Token: a1AuthToken, + }, }, { Id: uuid.NewString(), Name: "a2", @@ -330,13 +366,21 @@ func TestWorkspace(t *testing.T) { workspace := coderdtest.CreateWorkspace(t, client, template.ID) coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + _ = agenttest.New(t, client.URL, a1AuthToken) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() var err error testutil.Eventually(ctx, t, func(ctx context.Context) bool { workspace, err = client.Workspace(ctx, workspace.ID) - return assert.NoError(t, err) && !workspace.Health.Healthy + if err != nil { + return false + } + // Wait for the mixed state: a1 connected (healthy) + // and workspace unhealthy (because a2 timed out). + agent1 := workspace.LatestBuild.Resources[0].Agents[0] + return agent1.Health.Healthy && !workspace.Health.Healthy }, testutil.IntervalMedium) assert.False(t, workspace.Health.Healthy) @@ -360,6 +404,7 @@ func TestWorkspace(t *testing.T) { // disconnected, but this should not make the workspace unhealthy. client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) user := coderdtest.CreateFirstUser(t, client) + authToken := uuid.NewString() version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ Parse: echo.ParseComplete, ProvisionGraph: []*proto.Response{{ @@ -371,7 +416,9 @@ func TestWorkspace(t *testing.T) { Agents: []*proto.Agent{{ Id: uuid.NewString(), Name: "parent", - Auth: &proto.Agent_Token{}, + Auth: &proto.Agent_Token{ + Token: authToken, + }, }}, }}, }, @@ -383,14 +430,23 @@ func TestWorkspace(t *testing.T) { workspace := coderdtest.CreateWorkspace(t, client, template.ID) coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + _ = agenttest.New(t, client.URL, authToken) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() - // Get the workspace and parent agent. - workspace, err := client.Workspace(ctx, workspace.ID) - require.NoError(t, err) - parentAgent := workspace.LatestBuild.Resources[0].Agents[0] - require.True(t, parentAgent.Health.Healthy, "parent agent should be healthy initially") + // Wait for the parent agent to connect and be healthy. + var parentAgent codersdk.WorkspaceAgent + testutil.Eventually(ctx, t, func(ctx context.Context) bool { + var err error + workspace, err = client.Workspace(ctx, workspace.ID) + if err != nil { + return false + } + parentAgent = workspace.LatestBuild.Resources[0].Agents[0] + return parentAgent.Health.Healthy + }, testutil.IntervalMedium) + require.True(t, parentAgent.Health.Healthy, "parent agent should be healthy") // Create a sub-agent with a short connection timeout so it becomes // unhealthy quickly (simulating a devcontainer rebuild scenario). @@ -404,6 +460,7 @@ func TestWorkspace(t *testing.T) { // Wait for the sub-agent to become unhealthy due to timeout. var subAgentUnhealthy bool require.Eventually(t, func() bool { + var err error workspace, err = client.Workspace(ctx, workspace.ID) if err != nil { return false diff --git a/site/src/modules/workspaces/health.test.ts b/site/src/modules/workspaces/health.test.ts index ed8e2d61a2..357a6ccadb 100644 --- a/site/src/modules/workspaces/health.test.ts +++ b/site/src/modules/workspaces/health.test.ts @@ -152,14 +152,15 @@ describe("getAgentHealthIssue", () => { }); }); - it("returns connecting issue as default fallback", () => { + it("returns connecting issue for a connecting agent", () => { const ws = buildWorkspace( [{ status: "connecting", lifecycle_state: "starting" }], 1, ); expect(getAgentHealthIssue(ws)).toEqual({ - title: "Workspace agent is still connecting", - detail: "Check the log output if the connection does not complete.", + title: "Workspace agent is connecting", + detail: + "The workspace agent has not connected yet. Wait for it to connect or check the logs if it does not.", severity: "info", prominent: false, }); @@ -227,7 +228,7 @@ describe("getAgentHealthIssue", () => { 2, ); const result = getAgentHealthIssue(ws); - expect(result.title).toBe("2 workspace agents are still connecting"); + expect(result.title).toBe("2 workspace agents are connecting"); }); it("uses singular title when only one agent is failing", () => { @@ -329,7 +330,7 @@ describe("getAgentHealthIssue", () => { 1, ); const result = getAgentHealthIssue(ws); - expect(result.title).toBe("Workspace agent is still connecting"); + expect(result.title).toBe("Workspace agent is connecting"); expect(result.severity).toBe("info"); }); diff --git a/site/src/modules/workspaces/health.ts b/site/src/modules/workspaces/health.ts index cebe53a9b8..88d8ff43b7 100644 --- a/site/src/modules/workspaces/health.ts +++ b/site/src/modules/workspaces/health.ts @@ -34,6 +34,11 @@ export const agentScriptMessages = { * process connecting to the Coder control plane). */ export const agentConnectionMessages = { + connecting: { + title: "Workspace agent is connecting", + detail: + "The workspace agent has not connected yet. Wait for it to connect or check the logs if it does not.", + }, timeout: { title: "Agent is taking longer than expected to connect", detail: @@ -155,6 +160,17 @@ export function getAgentHealthIssue(workspace: Workspace): AgentHealthIssue { }; } + if (statusSet.has("connecting")) { + return { + title: plural + ? `${failingAgentCount} workspace agents are connecting` + : agentConnectionMessages.connecting.title, + detail: agentConnectionMessages.connecting.detail, + severity: "info", + prominent: false, + }; + } + return { title: plural ? `${failingAgentCount} workspace agents are still connecting`