fix: run stop build before starting a workspace with a failed start (#22925)

This commit is contained in:
Jon Ayers
2026-03-18 14:58:20 -05:00
committed by GitHub
parent 147d627505
commit eba7d943a0
5 changed files with 146 additions and 14 deletions
+23
View File
@@ -79,6 +79,29 @@ func (r *RootCmd) start() *serpent.Command {
)
build = workspace.LatestBuild
default:
// If the last build was a failed start, run a stop
// first to clean up any partially-provisioned
// resources.
if workspace.LatestBuild.Status == codersdk.WorkspaceStatusFailed &&
workspace.LatestBuild.Transition == codersdk.WorkspaceTransitionStart {
_, _ = fmt.Fprintf(inv.Stdout, "The last start build failed. Cleaning up before retrying...\n")
stopBuild, stopErr := client.CreateWorkspaceBuild(inv.Context(), workspace.ID, codersdk.CreateWorkspaceBuildRequest{
Transition: codersdk.WorkspaceTransitionStop,
})
if stopErr != nil {
return xerrors.Errorf("cleanup stop after failed start: %w", stopErr)
}
stopErr = cliui.WorkspaceBuild(inv.Context(), inv.Stdout, client, stopBuild.ID)
if stopErr != nil {
return xerrors.Errorf("wait for cleanup stop: %w", stopErr)
}
// Re-fetch workspace after stop completes so
// startWorkspace sees the latest state.
workspace, err = namedWorkspace(inv.Context(), client, inv.Args[0])
if err != nil {
return err
}
}
build, err = startWorkspace(inv, client, workspace, parameterFlags, bflags, WorkspaceStart)
// It's possible for a workspace build to fail due to the template requiring starting
// workspaces with the active version.
+52
View File
@@ -534,3 +534,55 @@ func TestStart_WithReason(t *testing.T) {
workspace = coderdtest.MustWorkspace(t, member, workspace.ID)
require.Equal(t, codersdk.BuildReasonCLI, workspace.LatestBuild.Reason)
}
func TestStart_FailedStartCleansUp(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
store, ps := dbtestutil.NewDB(t)
client := coderdtest.New(t, &coderdtest.Options{
Database: store,
Pubsub: ps,
IncludeProvisionerDaemon: true,
})
owner := coderdtest.CreateFirstUser(t, client)
memberClient, member := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID)
version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil)
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, owner.OrganizationID, version.ID)
workspace := coderdtest.CreateWorkspace(t, memberClient, template.ID)
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
// Insert a failed start build directly into the database so that
// the workspace's latest build is a failed "start" transition.
dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
ID: workspace.ID,
OwnerID: member.ID,
OrganizationID: owner.OrganizationID,
TemplateID: template.ID,
}).
Seed(database.WorkspaceBuild{
TemplateVersionID: version.ID,
Transition: database.WorkspaceTransitionStart,
BuildNumber: workspace.LatestBuild.BuildNumber + 1,
}).
Failed().
Do()
inv, root := clitest.New(t, "start", workspace.Name)
clitest.SetupConfig(t, memberClient, root)
pty := ptytest.New(t).Attach(inv)
doneChan := make(chan struct{})
go func() {
defer close(doneChan)
err := inv.Run()
assert.NoError(t, err)
}()
// The CLI should detect the failed start and clean up first.
pty.ExpectMatch("Cleaning up before retrying")
pty.ExpectMatch("workspace has been started")
_ = testutil.TryReceive(ctx, t, doneChan)
}
+29
View File
@@ -1483,6 +1483,35 @@ class ApiMethods {
await this.waitForBuild(startBuild);
};
/**
* Starts a workspace, but if the last build was a failed start,
* stops it first to give it a clean slate and the best chance
* of success.
*/
retryWorkspace = async (
workspace: TypesGen.Workspace,
templateVersionId: string,
logLevel?: TypesGen.ProvisionerLogLevel,
buildParameters?: TypesGen.WorkspaceBuildParameter[],
): Promise<TypesGen.WorkspaceBuild> => {
if (
workspace.latest_build.status === "failed" &&
workspace.latest_build.transition === "start"
) {
const stopBuild = await this.stopWorkspace(workspace.id, logLevel);
const awaitedStop = await this.waitForBuild(stopBuild);
if (awaitedStop?.status === "canceled") {
throw new Error("Cleanup stop was canceled");
}
}
return this.startWorkspace(
workspace.id,
templateVersionId,
logLevel,
buildParameters,
);
};
cancelTemplateVersionBuild = async (
templateVersionId: string,
): Promise<TypesGen.Response> => {
@@ -421,7 +421,9 @@ describe("WorkspacePage", () => {
const retryDebugButtonRe = /^Debug$/i;
describe("Retries a failed 'Start' transition", () => {
const mockStart = jest.spyOn(API, "startWorkspace");
const mockRetry = jest
.spyOn(API, "retryWorkspace")
.mockResolvedValue(MockWorkspaceBuild);
const failedStart: Workspace = {
...MockFailedWorkspace,
latest_build: {
@@ -431,10 +433,10 @@ describe("WorkspacePage", () => {
};
test("Retry with no debug", async () => {
await testButton(failedStart, retryButtonRe, mockStart);
await testButton(failedStart, retryButtonRe, mockRetry);
expect(mockStart).toBeCalledWith(
failedStart.id,
expect(mockRetry).toBeCalledWith(
failedStart,
failedStart.latest_build.template_version_id,
undefined,
undefined,
@@ -442,10 +444,10 @@ describe("WorkspacePage", () => {
});
test("Retry with debug logs", async () => {
await testButton(failedStart, retryDebugButtonRe, mockStart);
await testButton(failedStart, retryDebugButtonRe, mockRetry);
expect(mockStart).toBeCalledWith(
failedStart.id,
expect(mockRetry).toBeCalledWith(
failedStart,
failedStart.latest_build.template_version_id,
"debug",
undefined,
@@ -522,7 +524,9 @@ describe("WorkspacePage", () => {
return HttpResponse.json([parameter]);
}),
);
const startWorkspaceSpy = jest.spyOn(API, "startWorkspace");
const retryWorkspaceSpy = jest
.spyOn(API, "retryWorkspace")
.mockResolvedValue(MockWorkspaceBuild);
await renderWorkspacePage(workspace);
const retryWithBuildParametersButton = await screen.findByRole("button", {
@@ -539,8 +543,8 @@ describe("WorkspacePage", () => {
await user.click(submitButton);
await waitFor(() => {
expect(startWorkspaceSpy).toBeCalledWith(
workspace.id,
expect(retryWorkspaceSpy).toBeCalledWith(
workspace,
workspace.latest_build.template_version_id,
undefined,
[{ name: parameter.name, value: "some-value" }],
@@ -568,7 +572,9 @@ describe("WorkspacePage", () => {
return HttpResponse.json([parameter]);
}),
);
const startWorkspaceSpy = jest.spyOn(API, "startWorkspace");
const retryWorkspaceSpy = jest
.spyOn(API, "retryWorkspace")
.mockResolvedValue(MockWorkspaceBuild);
await renderWorkspacePage(workspace);
const retryWithBuildParametersButton = await screen.findByRole("button", {
@@ -585,8 +591,8 @@ describe("WorkspacePage", () => {
await user.click(submitButton);
await waitFor(() => {
expect(startWorkspaceSpy).toBeCalledWith(
workspace.id,
expect(retryWorkspaceSpy).toBeCalledWith(
workspace,
workspace.latest_build.template_version_id,
"debug",
[{ name: parameter.name, value: "some-value" }],
@@ -159,6 +159,28 @@ export const WorkspaceReadyPage: FC<WorkspaceReadyPageProps> = ({
},
});
// Retry workspace (stop-before-start for failed workspaces)
const retryWorkspaceMutation = useMutation({
...startWorkspace(workspace, queryClient),
mutationFn: ({
buildParameters,
logLevel,
}: {
buildParameters?: TypesGen.WorkspaceBuildParameter[];
logLevel?: TypesGen.ProvisionerLogLevel;
}) => {
return API.retryWorkspace(
workspace,
workspace.latest_build.template_version_id,
logLevel,
buildParameters,
);
},
onError: (error: unknown) => {
handleError(error);
},
});
// Toggle workspace favorite
const toggleFavoriteMutation = useMutation({
...toggleFavorite(workspace, queryClient),
@@ -246,7 +268,7 @@ export const WorkspaceReadyPage: FC<WorkspaceReadyPageProps> = ({
} else {
switch (workspace.latest_build.transition) {
case "start":
startWorkspaceMutation.mutate({
retryWorkspaceMutation.mutate({
logLevel,
buildParameters,
});