From eba7d943a0b7e95fef38ab9dc621c62620aec450 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 18 Mar 2026 14:58:20 -0500 Subject: [PATCH] fix: run stop build before starting a workspace with a failed start (#22925) --- cli/start.go | 23 ++++++++ cli/start_test.go | 52 +++++++++++++++++++ site/src/api/api.ts | 29 +++++++++++ .../WorkspacePage/WorkspacePage.jest.tsx | 32 +++++++----- .../WorkspacePage/WorkspaceReadyPage.tsx | 24 ++++++++- 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/cli/start.go b/cli/start.go index 7949f30871..43b0e75b1c 100644 --- a/cli/start.go +++ b/cli/start.go @@ -79,6 +79,29 @@ func (r *RootCmd) start() *serpent.Command { ) build = workspace.LatestBuild default: + // If the last build was a failed start, run a stop + // first to clean up any partially-provisioned + // resources. + if workspace.LatestBuild.Status == codersdk.WorkspaceStatusFailed && + workspace.LatestBuild.Transition == codersdk.WorkspaceTransitionStart { + _, _ = fmt.Fprintf(inv.Stdout, "The last start build failed. Cleaning up before retrying...\n") + stopBuild, stopErr := client.CreateWorkspaceBuild(inv.Context(), workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + Transition: codersdk.WorkspaceTransitionStop, + }) + if stopErr != nil { + return xerrors.Errorf("cleanup stop after failed start: %w", stopErr) + } + stopErr = cliui.WorkspaceBuild(inv.Context(), inv.Stdout, client, stopBuild.ID) + if stopErr != nil { + return xerrors.Errorf("wait for cleanup stop: %w", stopErr) + } + // Re-fetch workspace after stop completes so + // startWorkspace sees the latest state. + workspace, err = namedWorkspace(inv.Context(), client, inv.Args[0]) + if err != nil { + return err + } + } build, err = startWorkspace(inv, client, workspace, parameterFlags, bflags, WorkspaceStart) // It's possible for a workspace build to fail due to the template requiring starting // workspaces with the active version. diff --git a/cli/start_test.go b/cli/start_test.go index 54cf419b38..7789ee8423 100644 --- a/cli/start_test.go +++ b/cli/start_test.go @@ -534,3 +534,55 @@ func TestStart_WithReason(t *testing.T) { workspace = coderdtest.MustWorkspace(t, member, workspace.ID) require.Equal(t, codersdk.BuildReasonCLI, workspace.LatestBuild.Reason) } + +func TestStart_FailedStartCleansUp(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitLong) + + store, ps := dbtestutil.NewDB(t) + client := coderdtest.New(t, &coderdtest.Options{ + Database: store, + Pubsub: ps, + IncludeProvisionerDaemon: true, + }) + owner := coderdtest.CreateFirstUser(t, client) + memberClient, member := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID) + + version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil) + coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, owner.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, memberClient, template.ID) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + + // Insert a failed start build directly into the database so that + // the workspace's latest build is a failed "start" transition. + dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + ID: workspace.ID, + OwnerID: member.ID, + OrganizationID: owner.OrganizationID, + TemplateID: template.ID, + }). + Seed(database.WorkspaceBuild{ + TemplateVersionID: version.ID, + Transition: database.WorkspaceTransitionStart, + BuildNumber: workspace.LatestBuild.BuildNumber + 1, + }). + Failed(). + Do() + + inv, root := clitest.New(t, "start", workspace.Name) + clitest.SetupConfig(t, memberClient, root) + pty := ptytest.New(t).Attach(inv) + doneChan := make(chan struct{}) + go func() { + defer close(doneChan) + err := inv.Run() + assert.NoError(t, err) + }() + + // The CLI should detect the failed start and clean up first. + pty.ExpectMatch("Cleaning up before retrying") + pty.ExpectMatch("workspace has been started") + + _ = testutil.TryReceive(ctx, t, doneChan) +} diff --git a/site/src/api/api.ts b/site/src/api/api.ts index 65a346d1ad..ef78befd85 100644 --- a/site/src/api/api.ts +++ b/site/src/api/api.ts @@ -1483,6 +1483,35 @@ class ApiMethods { await this.waitForBuild(startBuild); }; + /** + * Starts a workspace, but if the last build was a failed start, + * stops it first to give it a clean slate and the best chance + * of success. + */ + retryWorkspace = async ( + workspace: TypesGen.Workspace, + templateVersionId: string, + logLevel?: TypesGen.ProvisionerLogLevel, + buildParameters?: TypesGen.WorkspaceBuildParameter[], + ): Promise => { + if ( + workspace.latest_build.status === "failed" && + workspace.latest_build.transition === "start" + ) { + const stopBuild = await this.stopWorkspace(workspace.id, logLevel); + const awaitedStop = await this.waitForBuild(stopBuild); + if (awaitedStop?.status === "canceled") { + throw new Error("Cleanup stop was canceled"); + } + } + return this.startWorkspace( + workspace.id, + templateVersionId, + logLevel, + buildParameters, + ); + }; + cancelTemplateVersionBuild = async ( templateVersionId: string, ): Promise => { diff --git a/site/src/pages/WorkspacePage/WorkspacePage.jest.tsx b/site/src/pages/WorkspacePage/WorkspacePage.jest.tsx index 594c625444..71bcf36f9f 100644 --- a/site/src/pages/WorkspacePage/WorkspacePage.jest.tsx +++ b/site/src/pages/WorkspacePage/WorkspacePage.jest.tsx @@ -421,7 +421,9 @@ describe("WorkspacePage", () => { const retryDebugButtonRe = /^Debug$/i; describe("Retries a failed 'Start' transition", () => { - const mockStart = jest.spyOn(API, "startWorkspace"); + const mockRetry = jest + .spyOn(API, "retryWorkspace") + .mockResolvedValue(MockWorkspaceBuild); const failedStart: Workspace = { ...MockFailedWorkspace, latest_build: { @@ -431,10 +433,10 @@ describe("WorkspacePage", () => { }; test("Retry with no debug", async () => { - await testButton(failedStart, retryButtonRe, mockStart); + await testButton(failedStart, retryButtonRe, mockRetry); - expect(mockStart).toBeCalledWith( - failedStart.id, + expect(mockRetry).toBeCalledWith( + failedStart, failedStart.latest_build.template_version_id, undefined, undefined, @@ -442,10 +444,10 @@ describe("WorkspacePage", () => { }); test("Retry with debug logs", async () => { - await testButton(failedStart, retryDebugButtonRe, mockStart); + await testButton(failedStart, retryDebugButtonRe, mockRetry); - expect(mockStart).toBeCalledWith( - failedStart.id, + expect(mockRetry).toBeCalledWith( + failedStart, failedStart.latest_build.template_version_id, "debug", undefined, @@ -522,7 +524,9 @@ describe("WorkspacePage", () => { return HttpResponse.json([parameter]); }), ); - const startWorkspaceSpy = jest.spyOn(API, "startWorkspace"); + const retryWorkspaceSpy = jest + .spyOn(API, "retryWorkspace") + .mockResolvedValue(MockWorkspaceBuild); await renderWorkspacePage(workspace); const retryWithBuildParametersButton = await screen.findByRole("button", { @@ -539,8 +543,8 @@ describe("WorkspacePage", () => { await user.click(submitButton); await waitFor(() => { - expect(startWorkspaceSpy).toBeCalledWith( - workspace.id, + expect(retryWorkspaceSpy).toBeCalledWith( + workspace, workspace.latest_build.template_version_id, undefined, [{ name: parameter.name, value: "some-value" }], @@ -568,7 +572,9 @@ describe("WorkspacePage", () => { return HttpResponse.json([parameter]); }), ); - const startWorkspaceSpy = jest.spyOn(API, "startWorkspace"); + const retryWorkspaceSpy = jest + .spyOn(API, "retryWorkspace") + .mockResolvedValue(MockWorkspaceBuild); await renderWorkspacePage(workspace); const retryWithBuildParametersButton = await screen.findByRole("button", { @@ -585,8 +591,8 @@ describe("WorkspacePage", () => { await user.click(submitButton); await waitFor(() => { - expect(startWorkspaceSpy).toBeCalledWith( - workspace.id, + expect(retryWorkspaceSpy).toBeCalledWith( + workspace, workspace.latest_build.template_version_id, "debug", [{ name: parameter.name, value: "some-value" }], diff --git a/site/src/pages/WorkspacePage/WorkspaceReadyPage.tsx b/site/src/pages/WorkspacePage/WorkspaceReadyPage.tsx index daa83eab32..4451a7f832 100644 --- a/site/src/pages/WorkspacePage/WorkspaceReadyPage.tsx +++ b/site/src/pages/WorkspacePage/WorkspaceReadyPage.tsx @@ -159,6 +159,28 @@ export const WorkspaceReadyPage: FC = ({ }, }); + // Retry workspace (stop-before-start for failed workspaces) + const retryWorkspaceMutation = useMutation({ + ...startWorkspace(workspace, queryClient), + mutationFn: ({ + buildParameters, + logLevel, + }: { + buildParameters?: TypesGen.WorkspaceBuildParameter[]; + logLevel?: TypesGen.ProvisionerLogLevel; + }) => { + return API.retryWorkspace( + workspace, + workspace.latest_build.template_version_id, + logLevel, + buildParameters, + ); + }, + onError: (error: unknown) => { + handleError(error); + }, + }); + // Toggle workspace favorite const toggleFavoriteMutation = useMutation({ ...toggleFavorite(workspace, queryClient), @@ -246,7 +268,7 @@ export const WorkspaceReadyPage: FC = ({ } else { switch (workspace.latest_build.transition) { case "start": - startWorkspaceMutation.mutate({ + retryWorkspaceMutation.mutate({ logLevel, buildParameters, });