mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
fix(scaletest/prebuilds): fix Runner.Cleanup() to delete workspaces (#23627)
This commit is contained in:
+121
-16
@@ -6,6 +6,7 @@ import (
|
||||
_ "embed"
|
||||
"html/template"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/scaletest/harness"
|
||||
"github.com/coder/coder/v2/scaletest/loadtestutil"
|
||||
"github.com/coder/coder/v2/scaletest/workspacebuild"
|
||||
)
|
||||
|
||||
type Runner struct {
|
||||
@@ -77,6 +79,31 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
|
||||
}
|
||||
templ, err := r.client.CreateTemplate(ctx, r.cfg.OrganizationID, templateReq)
|
||||
if err != nil {
|
||||
// If the template already exists from a previous failed run, look it up so
|
||||
// Cleanup() can delete it and the rerun doesn't leave orphaned resources.
|
||||
var sdkErr *codersdk.Error
|
||||
if xerrors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusConflict {
|
||||
existing, listErr := r.client.Templates(ctx, codersdk.TemplateFilter{
|
||||
OrganizationID: r.cfg.OrganizationID,
|
||||
ExactName: templateName,
|
||||
})
|
||||
if listErr == nil && len(existing) > 0 {
|
||||
r.template = existing[0]
|
||||
logger.Warn(ctx, "template already exists from a previous run, will be cleaned up",
|
||||
slog.F("template_name", r.template.Name),
|
||||
slog.F("template_id", r.template.ID),
|
||||
)
|
||||
// Clear any prebuild config on the orphaned template so the
|
||||
// reconciler doesn't keep spawning workspaces while Cleanup()
|
||||
// is trying to delete them.
|
||||
if clearErr := r.pushEmptyTemplateVersion(ctx); clearErr != nil {
|
||||
logger.Warn(ctx, "failed to clear prebuilds config on orphaned template",
|
||||
slog.F("template_id", r.template.ID),
|
||||
slog.Error(clearErr),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
r.cfg.Metrics.AddError(templateName, "create_template")
|
||||
return xerrors.Errorf("create template: %w", err)
|
||||
}
|
||||
@@ -105,21 +132,12 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
|
||||
r.cfg.DeletionSetupBarrier.Wait()
|
||||
logger.Info(ctx, "prebuilds paused, preparing for deletion")
|
||||
|
||||
// Now prepare for deletion by creating an empty template version
|
||||
// At this point, prebuilds should be paused by the caller
|
||||
// Now prepare for deletion by creating an empty template version.
|
||||
// At this point, prebuilds should be paused by the caller.
|
||||
logger.Info(ctx, "creating empty template version for deletion")
|
||||
emptyVersion, err := r.createTemplateVersion(ctx, r.template.ID, 0, 0)
|
||||
if err != nil {
|
||||
r.cfg.Metrics.AddError(r.template.Name, "create_empty_template_version")
|
||||
return xerrors.Errorf("create empty template version for deletion: %w", err)
|
||||
}
|
||||
|
||||
err = r.client.UpdateActiveTemplateVersion(ctx, r.template.ID, codersdk.UpdateActiveTemplateVersion{
|
||||
ID: emptyVersion.ID,
|
||||
})
|
||||
if err != nil {
|
||||
r.cfg.Metrics.AddError(r.template.Name, "update_active_template_version")
|
||||
return xerrors.Errorf("update active template version to empty for deletion: %w", err)
|
||||
if err = r.pushEmptyTemplateVersion(ctx); err != nil {
|
||||
r.cfg.Metrics.AddError(r.template.Name, "clear_template_prebuilds")
|
||||
return xerrors.Errorf("clear template prebuilds for deletion: %w", err)
|
||||
}
|
||||
|
||||
logger.Info(ctx, "waiting for all runners to reach deletion barrier")
|
||||
@@ -358,14 +376,79 @@ func (r *Runner) createTemplateVersion(ctx context.Context, templateID uuid.UUID
|
||||
|
||||
var errTickerDone = xerrors.New("done")
|
||||
|
||||
// pushEmptyTemplateVersion pushes a new empty template version (no presets, no
|
||||
// prebuilds) and makes it active. This stops the reconciler from spawning new
|
||||
// prebuild workspaces for the template.
|
||||
func (r *Runner) pushEmptyTemplateVersion(ctx context.Context) error {
|
||||
emptyVersion, err := r.createTemplateVersion(ctx, r.template.ID, 0, 0)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("create empty template version: %w", err)
|
||||
}
|
||||
if err = r.client.UpdateActiveTemplateVersion(ctx, r.template.ID, codersdk.UpdateActiveTemplateVersion{
|
||||
ID: emptyVersion.ID,
|
||||
}); err != nil {
|
||||
return xerrors.Errorf("update active template version: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Runner) Cleanup(ctx context.Context, _ string, logs io.Writer) error {
|
||||
logs = loadtestutil.NewSyncWriter(logs)
|
||||
logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
|
||||
|
||||
logger.Info(ctx, "deleting template", slog.F("template_name", r.template.Name))
|
||||
// If Run failed before the template was created, there is nothing to clean up.
|
||||
if r.template.ID == uuid.Nil {
|
||||
logger.Info(ctx, "template was never created, skipping cleanup")
|
||||
return nil
|
||||
}
|
||||
|
||||
err := r.client.DeleteTemplate(ctx, r.template.ID)
|
||||
// Workspaces must be deleted before the template can be deleted.
|
||||
workspaces, err := allWorkspacesForTemplate(ctx, r.client, r.template.Name)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("list workspaces for template %q: %w", r.template.Name, err)
|
||||
}
|
||||
|
||||
logger.Info(ctx, "deleting workspaces for template", slog.F("count", len(workspaces)), slog.F("template_name", r.template.Name))
|
||||
|
||||
// Retry failed workspace deletions up to maxDeletionAttempts times to
|
||||
// handle transient errors (e.g. a delete build that fails due to a
|
||||
// provisioner hiccup).
|
||||
const maxDeletionAttempts = 3
|
||||
remaining := workspaces
|
||||
for attempt := range maxDeletionAttempts {
|
||||
if len(remaining) == 0 {
|
||||
break
|
||||
}
|
||||
logger.Info(ctx, "trying to delete workspaces",
|
||||
slog.F("attempt", attempt+1),
|
||||
slog.F("remaining", len(remaining)),
|
||||
slog.F("template_name", r.template.Name),
|
||||
)
|
||||
var failed []codersdk.Workspace
|
||||
for _, ws := range remaining {
|
||||
cr := workspacebuild.NewCleanupRunner(r.client, ws.ID)
|
||||
if err := cr.Run(ctx, ws.ID.String(), logs); err != nil {
|
||||
logger.Warn(ctx, "failed to delete workspace",
|
||||
slog.F("workspace_id", ws.ID),
|
||||
slog.F("workspace_name", ws.Name),
|
||||
slog.Error(err),
|
||||
)
|
||||
failed = append(failed, ws)
|
||||
}
|
||||
}
|
||||
remaining = failed
|
||||
}
|
||||
|
||||
if len(remaining) > 0 {
|
||||
ids := make([]string, len(remaining))
|
||||
for i, ws := range remaining {
|
||||
ids[i] = ws.ID.String()
|
||||
}
|
||||
return xerrors.Errorf("could not delete all workspaces after %d attempts; remaining: %v", maxDeletionAttempts, ids)
|
||||
}
|
||||
|
||||
logger.Info(ctx, "deleting template", slog.F("template_name", r.template.Name))
|
||||
if err := r.client.DeleteTemplate(ctx, r.template.ID); err != nil {
|
||||
return xerrors.Errorf("delete template: %w", err)
|
||||
}
|
||||
|
||||
@@ -373,6 +456,28 @@ func (r *Runner) Cleanup(ctx context.Context, _ string, logs io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// allWorkspacesForTemplate returns all workspaces belonging to templateName,
|
||||
// paginating through results until exhausted.
|
||||
func allWorkspacesForTemplate(ctx context.Context, client *codersdk.Client, templateName string) ([]codersdk.Workspace, error) {
|
||||
const pageSize = 100
|
||||
var workspaces []codersdk.Workspace
|
||||
for page := 0; ; page++ {
|
||||
resp, err := client.Workspaces(ctx, codersdk.WorkspaceFilter{
|
||||
Template: templateName,
|
||||
Offset: page * pageSize,
|
||||
Limit: pageSize,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("list workspaces page %d: %w", page, err)
|
||||
}
|
||||
workspaces = append(workspaces, resp.Workspaces...)
|
||||
if len(resp.Workspaces) < pageSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
return workspaces, nil
|
||||
}
|
||||
|
||||
//go:embed tf/main.tf.tpl
|
||||
var templateContent string
|
||||
|
||||
|
||||
Reference in New Issue
Block a user