feat: add cli command scaletest dynamic-parameters (#20034)

part of https://github.com/coder/internal/issues/912

Adds CLI command `coder exp scaletest dynamic-parameters`

I've left out the configuration of tracing and timeouts for now. I think I want to do some refactoring of the scaletest CLI to make handling those flags take up less boiler plate.

I will add tracing and timeout flags in a follow up PR.
This commit is contained in:
Spike Curtis
2025-10-07 21:53:59 +04:00
committed by GitHub
parent 0e0f0925e4
commit 65335bc7d4
8 changed files with 700 additions and 7 deletions
+1
View File
@@ -60,6 +60,7 @@ func (r *RootCmd) scaletestCmd() *serpent.Command {
Children: []*serpent.Command{
r.scaletestCleanup(),
r.scaletestDashboard(),
r.scaletestDynamicParameters(),
r.scaletestCreateWorkspaces(),
r.scaletestWorkspaceUpdates(),
r.scaletestWorkspaceTraffic(),
+110
View File
@@ -0,0 +1,110 @@
//go:build !slim
package cli
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/xerrors"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"github.com/coder/coder/v2/scaletest/dynamicparameters"
"github.com/coder/coder/v2/scaletest/harness"
"github.com/coder/serpent"
)
const (
dynamicParametersTestName = "dynamic-parameters"
)
func (r *RootCmd) scaletestDynamicParameters() *serpent.Command {
var templateName string
var numEvals int64
orgContext := NewOrganizationContext()
output := &scaletestOutputFlags{}
cmd := &serpent.Command{
Use: "dynamic-parameters",
Short: "Generates load on the Coder server evaluating dynamic parameters",
Long: `It is recommended that all rate limits are disabled on the server before running this scaletest. This test generates many login events which will be rate limited against the (most likely single) IP.`,
Handler: func(inv *serpent.Invocation) error {
ctx := inv.Context()
outputs, err := output.parse()
if err != nil {
return xerrors.Errorf("could not parse --output flags")
}
client, err := r.InitClient(inv)
if err != nil {
return err
}
if templateName == "" {
return xerrors.Errorf("template cannot be empty")
}
org, err := orgContext.Selected(inv, client)
if err != nil {
return err
}
logger := slog.Make(sloghuman.Sink(inv.Stdout)).Leveled(slog.LevelDebug)
partitions, err := dynamicparameters.SetupPartitions(ctx, client, org.ID, templateName, numEvals, logger)
if err != nil {
return xerrors.Errorf("setup dynamic parameters partitions: %w", err)
}
th := harness.NewTestHarness(harness.ConcurrentExecutionStrategy{}, harness.ConcurrentExecutionStrategy{})
reg := prometheus.NewRegistry()
metrics := dynamicparameters.NewMetrics(reg, "concurrent_evaluations")
for i, part := range partitions {
for j := range part.ConcurrentEvaluations {
cfg := dynamicparameters.Config{
TemplateVersion: part.TemplateVersion.ID,
Metrics: metrics,
MetricLabelValues: []string{fmt.Sprintf("%d", part.ConcurrentEvaluations)},
}
runner := dynamicparameters.NewRunner(client, cfg)
th.AddRun(dynamicParametersTestName, fmt.Sprintf("%d/%d", j, i), runner)
}
}
err = th.Run(ctx)
if err != nil {
return xerrors.Errorf("run test harness: %w", err)
}
res := th.Results()
for _, o := range outputs {
err = o.write(res, inv.Stdout)
if err != nil {
return xerrors.Errorf("write output %q to %q: %w", o.format, o.path, err)
}
}
return nil
},
}
cmd.Options = serpent.OptionSet{
{
Flag: "template",
Description: "Name of the template to use. If it does not exist, it will be created.",
Default: "scaletest-dynamic-parameters",
Value: serpent.StringOf(&templateName),
},
{
Flag: "concurrent-evaluations",
Description: "Number of concurrent dynamic parameter evaluations to perform.",
Default: "100",
Value: serpent.Int64Of(&numEvals),
},
}
orgContext.AttachOptions(cmd)
output.attach(&cmd.Options)
return cmd
}
+10
View File
@@ -519,6 +519,16 @@ func (e *Error) Error() string {
return builder.String()
}
// NewTestError is a helper function to create a Error, setting the internal fields. It's generally only useful for
// testing.
func NewTestError(statusCode int, method string, u string) *Error {
return &Error{
statusCode: statusCode,
method: method,
url: u,
}
}
type closeFunc func() error
func (c closeFunc) Close() error {
-1
View File
@@ -4,7 +4,6 @@ import "github.com/google/uuid"
type Config struct {
TemplateVersion uuid.UUID `json:"template_version"`
SessionToken string `json:"session_token"`
Metrics *Metrics `json:"-"`
MetricLabelValues []string `json:"metric_label_values"`
}
+1 -5
View File
@@ -22,12 +22,8 @@ type Runner struct {
var _ harness.Runnable = &Runner{}
func NewRunner(client *codersdk.Client, cfg Config) *Runner {
clone := codersdk.New(client.URL)
clone.HTTPClient = client.HTTPClient
clone.SetLogger(client.Logger())
clone.SetSessionToken(cfg.SessionToken)
return &Runner{
client: clone,
client: client,
cfg: cfg,
}
}
-1
View File
@@ -37,7 +37,6 @@ func TestRun(t *testing.T) {
reg := prometheus.NewRegistry()
cfg := dynamicparameters.Config{
TemplateVersion: version.ID,
SessionToken: userClient.SessionToken(),
Metrics: dynamicparameters.NewMetrics(reg, "template", "test_label_name"),
MetricLabelValues: []string{template.Name, "test_label_value"},
}
+285
View File
@@ -1,14 +1,30 @@
package dynamicparameters
import (
"archive/tar"
"bytes"
"context"
_ "embed"
"encoding/json"
"fmt"
"io"
"path/filepath"
"slices"
"strings"
"text/template"
"time"
"github.com/google/uuid"
"golang.org/x/xerrors"
"cdr.dev/slog"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/cryptorand"
"github.com/coder/quartz"
)
var ErrNoProvisionersMatched = xerrors.New("no provisioners matched")
//go:embed tf/main.tf
var templateContent string
@@ -72,3 +88,272 @@ func GetModuleFiles() map[string][]byte {
".terraform/modules/modules.json": modulesJSONBytes,
}
}
func createTarFromFiles(files map[string][]byte) ([]byte, error) {
buf := new(bytes.Buffer)
writer := tar.NewWriter(buf)
dirs := []string{}
for name, content := range files {
// We need to add directories before any files that use them. But, we only need to do this
// once.
dir := filepath.Dir(name)
if dir != "." && !slices.Contains(dirs, dir) {
dirs = append(dirs, dir)
err := writer.WriteHeader(&tar.Header{
Name: dir,
Mode: 0o755,
Typeflag: tar.TypeDir,
})
if err != nil {
return nil, err
}
}
err := writer.WriteHeader(&tar.Header{
Name: name,
Size: int64(len(content)),
Mode: 0o644,
})
if err != nil {
return nil, err
}
_, err = writer.Write(content)
if err != nil {
return nil, err
}
}
// `writer.Close()` function flushes the writer buffer, and adds extra padding to create a legal tarball.
err := writer.Close()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func TemplateTarData() ([]byte, error) {
mainTF, err := TemplateContent()
if err != nil {
return nil, xerrors.Errorf("failed to generate main.tf: %w", err)
}
moduleFiles := GetModuleFiles()
files := map[string][]byte{
"main.tf": []byte(mainTF),
}
for k, v := range moduleFiles {
files[k] = v
}
tarData, err := createTarFromFiles(files)
if err != nil {
return nil, xerrors.Errorf("failed to create tarball: %w", err)
}
return tarData, nil
}
type Partition struct {
TemplateVersion codersdk.TemplateVersion
ConcurrentEvaluations int
}
type SDKForDynamicParametersSetup interface {
TemplateByName(ctx context.Context, orgID uuid.UUID, templateName string) (codersdk.Template, error)
CreateTemplate(ctx context.Context, orgID uuid.UUID, createReq codersdk.CreateTemplateRequest) (codersdk.Template, error)
CreateTemplateVersion(ctx context.Context, orgID uuid.UUID, createReq codersdk.CreateTemplateVersionRequest) (codersdk.TemplateVersion, error)
Upload(ctx context.Context, contentType string, reader io.Reader) (codersdk.UploadResponse, error)
TemplateVersion(ctx context.Context, versionID uuid.UUID) (codersdk.TemplateVersion, error)
}
// partitioner is an internal struct to hold context and arguments for partition setup
// and to provide methods for all sub-steps.
type partitioner struct {
ctx context.Context
client SDKForDynamicParametersSetup
orgID uuid.UUID
templateName string
numEvals int64
logger slog.Logger
// for testing
clock quartz.Clock
}
func SetupPartitions(
ctx context.Context, client SDKForDynamicParametersSetup,
orgID uuid.UUID, templateName string, numEvals int64,
logger slog.Logger,
) ([]Partition, error) {
p := &partitioner{
ctx: ctx,
client: client,
orgID: orgID,
templateName: templateName,
numEvals: numEvals,
logger: logger,
clock: quartz.NewReal(),
}
return p.run()
}
func (p *partitioner) run() ([]Partition, error) {
var (
err error
coderError *codersdk.Error
templ codersdk.Template
tempVersion codersdk.TemplateVersion
)
templ, err = p.client.TemplateByName(p.ctx, p.orgID, p.templateName)
if xerrors.As(err, &coderError) && coderError.StatusCode() == 404 {
tempVersion, err = p.createTemplateVersion(uuid.Nil)
if err != nil {
return nil, xerrors.Errorf("failed to create template version: %w", err)
}
p.logger.Info(p.ctx, "created template version", slog.F("version_id", tempVersion.ID))
createReq := codersdk.CreateTemplateRequest{
Name: p.templateName,
DisplayName: "Scaletest Dynamic Parameters",
Description: "`coder exp scaletest dynamic parameters test` template",
VersionID: tempVersion.ID,
}
templ, err = p.client.CreateTemplate(p.ctx, p.orgID, createReq)
if err != nil {
return nil, xerrors.Errorf("failed to create template: %w", err)
}
p.logger.Info(p.ctx, "created template", slog.F("template_id", templ.ID), slog.F("name", p.templateName))
} else if err != nil {
return nil, xerrors.Errorf("failed to get template: %w", err)
}
// Partition the number into a list decreasing by half each time
evalParts := partitionEvaluations(int(p.numEvals))
p.logger.Info(p.ctx, "partitioned evaluations", slog.F("num_evals", p.numEvals), slog.F("eval_parts", evalParts))
// If tempVersion is not empty (i.e. we created it above), use it as the first version.
partitions := make([]Partition, 0, len(evalParts))
if tempVersion.ID != uuid.Nil {
partitions = append(partitions, Partition{
TemplateVersion: tempVersion,
ConcurrentEvaluations: evalParts[0],
})
evalParts = evalParts[1:]
}
for _, num := range evalParts {
version, err := p.createTemplateVersion(templ.ID)
if err != nil {
return nil, xerrors.Errorf("failed to create template version: %w", err)
}
partitions = append(partitions, Partition{
TemplateVersion: version,
ConcurrentEvaluations: num,
})
p.logger.Info(p.ctx, "created template version", slog.F("version_id", version.ID))
}
err = p.waitForTemplateVersionJobs(partitions)
if err != nil {
return nil, xerrors.Errorf("one or more template version jobs did not succeed: %w", err)
}
return partitions, nil
}
func (p *partitioner) createTemplateVersion(templateID uuid.UUID) (codersdk.TemplateVersion, error) {
tarData, err := TemplateTarData()
if err != nil {
return codersdk.TemplateVersion{}, xerrors.Errorf("failed to create template tarball: %w", err)
}
// Upload tarball
uploadResp, err := p.client.Upload(p.ctx, codersdk.ContentTypeTar, bytes.NewReader(tarData))
if err != nil {
return codersdk.TemplateVersion{}, xerrors.Errorf("failed to upload template tar: %w", err)
}
// Create template version
versionReq := codersdk.CreateTemplateVersionRequest{
TemplateID: templateID,
FileID: uploadResp.ID,
Message: "Initial version for scaletest dynamic parameters",
StorageMethod: codersdk.ProvisionerStorageMethodFile,
Provisioner: codersdk.ProvisionerTypeTerraform,
}
version, err := p.client.CreateTemplateVersion(p.ctx, p.orgID, versionReq)
if err != nil {
return codersdk.TemplateVersion{}, xerrors.Errorf("failed to create template version: %w", err)
}
if version.MatchedProvisioners != nil && version.MatchedProvisioners.Count == 0 {
return codersdk.TemplateVersion{}, ErrNoProvisionersMatched
}
return version, nil
}
func (p *partitioner) waitForTemplateVersionJobs(partitions []Partition) error {
const pollInterval = 2 * time.Second
done := xerrors.New("done")
pending := make(map[uuid.UUID]int)
for i, part := range partitions {
pending[part.TemplateVersion.ID] = i
}
tkr := p.clock.TickerFunc(p.ctx, pollInterval, func() error {
for versionID := range pending {
version, err := p.client.TemplateVersion(p.ctx, versionID)
if err != nil {
return xerrors.Errorf("failed to fetch template version %s: %w", versionID, err)
}
status := version.Job.Status
p.logger.Info(p.ctx, "polled template version job", slog.F("version_id", versionID), slog.F("status", status))
switch status {
case codersdk.ProvisionerJobSucceeded:
delete(pending, versionID)
case codersdk.ProvisionerJobPending, codersdk.ProvisionerJobRunning:
continue
default:
return ProvisionerJobUnexpectedStatusError{
TemplateVersionID: versionID,
Status: status,
JobError: version.Job.Error,
}
}
}
if len(pending) == 0 {
return done
}
return nil
}, "waitForTemplateVersionJobs")
err := tkr.Wait()
if xerrors.Is(err, done) {
return nil
}
return err
}
func partitionEvaluations(total int) []int {
var parts []int
remaining := total
for remaining > 0 {
next := remaining / 2
// round up
if next*2 != remaining {
next++
}
if next > remaining {
next = remaining
}
parts = append(parts, next)
remaining -= next
}
return parts
}
type ProvisionerJobUnexpectedStatusError struct {
TemplateVersionID uuid.UUID
Status codersdk.ProvisionerJobStatus
JobError string
}
func (e ProvisionerJobUnexpectedStatusError) Error() string {
return fmt.Sprintf("template version %s job in unexpected status %q, error '%s'", e.TemplateVersionID, e.Status, e.JobError)
}
@@ -0,0 +1,293 @@
package dynamicparameters
import (
"context"
"io"
"net/http"
"testing"
"time"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
"cdr.dev/slog"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/testutil"
"github.com/coder/quartz"
)
func TestPartitionEvaluations(t *testing.T) {
t.Parallel()
tests := []struct {
name string
input int
expected []int
}{
{
name: "10",
input: 10,
expected: []int{5, 3, 1, 1},
},
{
name: "11",
input: 11,
expected: []int{6, 3, 1, 1},
},
{
name: "12",
input: 12,
expected: []int{6, 3, 2, 1},
},
{
name: "600",
input: 600,
expected: []int{300, 150, 75, 38, 19, 9, 5, 2, 1, 1},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := partitionEvaluations(tc.input)
require.Equal(t, tc.expected, got)
total := 0
for _, v := range got {
total += v
}
require.Equal(t, tc.input, total)
})
}
}
func TestSetupPartitions_TemplateExists(t *testing.T) {
t.Parallel()
logger := testutil.Logger(t).Leveled(slog.LevelDebug)
ctx := testutil.Context(t, testutil.WaitShort)
orgID := uuid.New()
fClient := &fakeClient{
t: t,
expectedTemplateName: "test-template",
expectedOrgID: orgID,
matchedProvisioners: 1,
templateVersionJobStatus: codersdk.ProvisionerJobSucceeded,
}
mClock := quartz.NewMock(t)
trap := mClock.Trap().TickerFunc("waitForTemplateVersionJobs")
defer trap.Close()
uut := partitioner{
ctx: ctx,
client: fClient,
orgID: orgID,
templateName: "test-template",
numEvals: 600,
logger: logger,
clock: mClock,
}
var partitions []Partition
errCh := make(chan error, 1)
go func() {
var err error
partitions, err = uut.run()
errCh <- err
}()
trap.MustWait(ctx).MustRelease(ctx)
mClock.Advance(time.Second * 2).MustWait(ctx)
err := testutil.RequireReceive(ctx, t, errCh)
require.NoError(t, err)
// 600 evaluations should be partitioned into 10 parts: []int{300, 150, 75, 38, 19, 9, 5, 2, 1, 1}
// c.f. TestPartitionEvaluations. That's 10 template versions and associated uploads.
require.Equal(t, 10, len(partitions))
require.Equal(t, 10, fClient.templateVersionsCount)
require.Equal(t, 10, fClient.uploadsCount)
require.Equal(t, 1, fClient.templateByNameCount)
require.Equal(t, 0, fClient.createTemplateCount)
}
func TestSetupPartitions_TemplateDoesntExist(t *testing.T) {
t.Parallel()
logger := testutil.Logger(t).Leveled(slog.LevelDebug)
ctx := testutil.Context(t, testutil.WaitShort)
orgID := uuid.New()
fClient := &fakeClient{
t: t,
expectedTemplateName: "test-template",
expectedOrgID: orgID,
templateByNameError: codersdk.NewTestError(http.StatusNotFound, "", ""),
matchedProvisioners: 1,
templateVersionJobStatus: codersdk.ProvisionerJobSucceeded,
}
mClock := quartz.NewMock(t)
trap := mClock.Trap().TickerFunc("waitForTemplateVersionJobs")
defer trap.Close()
uut := partitioner{
ctx: ctx,
client: fClient,
orgID: orgID,
templateName: "test-template",
numEvals: 600,
logger: logger,
clock: mClock,
}
var partitions []Partition
errCh := make(chan error, 1)
go func() {
var err error
partitions, err = uut.run()
errCh <- err
}()
trap.MustWait(ctx).MustRelease(ctx)
mClock.Advance(time.Second * 2).MustWait(ctx)
err := testutil.RequireReceive(ctx, t, errCh)
require.NoError(t, err)
// 600 evaluations should be partitioned into 10 parts: []int{300, 150, 75, 38, 19, 9, 5, 2, 1, 1}
// c.f. TestPartitionEvaluations. That's 10 template versions and associated uploads.
require.Equal(t, 10, len(partitions))
require.Equal(t, 10, fClient.templateVersionsCount)
require.Equal(t, 10, fClient.uploadsCount)
require.Equal(t, 1, fClient.templateByNameCount)
require.Equal(t, 1, fClient.createTemplateCount)
}
func TestSetupPartitions_NoMatchedProvisioners(t *testing.T) {
t.Parallel()
logger := testutil.Logger(t).Leveled(slog.LevelDebug)
ctx := testutil.Context(t, testutil.WaitShort)
orgID := uuid.New()
fClient := &fakeClient{
t: t,
expectedTemplateName: "test-template",
expectedOrgID: orgID,
matchedProvisioners: 0,
templateVersionJobStatus: codersdk.ProvisionerJobSucceeded,
}
mClock := quartz.NewMock(t)
uut := partitioner{
ctx: ctx,
client: fClient,
orgID: orgID,
templateName: "test-template",
numEvals: 600,
logger: logger,
clock: mClock,
}
errCh := make(chan error, 1)
go func() {
_, err := uut.run()
errCh <- err
}()
err := testutil.RequireReceive(ctx, t, errCh)
require.ErrorIs(t, err, ErrNoProvisionersMatched)
require.Equal(t, 1, fClient.templateVersionsCount)
require.Equal(t, 1, fClient.uploadsCount)
require.Equal(t, 1, fClient.templateByNameCount)
require.Equal(t, 0, fClient.createTemplateCount)
}
func TestSetupPartitions_JobFailed(t *testing.T) {
t.Parallel()
logger := testutil.Logger(t).Leveled(slog.LevelDebug)
ctx := testutil.Context(t, testutil.WaitShort)
orgID := uuid.New()
fClient := &fakeClient{
t: t,
expectedTemplateName: "test-template",
expectedOrgID: orgID,
matchedProvisioners: 1,
templateVersionJobStatus: codersdk.ProvisionerJobFailed,
}
mClock := quartz.NewMock(t)
trap := mClock.Trap().TickerFunc("waitForTemplateVersionJobs")
defer trap.Close()
uut := partitioner{
ctx: ctx,
client: fClient,
orgID: orgID,
templateName: "test-template",
numEvals: 600,
logger: logger,
clock: mClock,
}
errCh := make(chan error, 1)
go func() {
_, err := uut.run()
errCh <- err
}()
trap.MustWait(ctx).MustRelease(ctx)
mClock.Advance(time.Second * 2).MustWait(ctx)
err := testutil.RequireReceive(ctx, t, errCh)
require.ErrorAs(t, err, &ProvisionerJobUnexpectedStatusError{})
require.Equal(t, 10, fClient.templateVersionsCount)
require.Equal(t, 10, fClient.uploadsCount)
require.Equal(t, 1, fClient.templateByNameCount)
require.Equal(t, 0, fClient.createTemplateCount)
}
type fakeClient struct {
t testing.TB
expectedTemplateName string
expectedOrgID uuid.UUID
templateByNameError error
matchedProvisioners int
templateVersionJobStatus codersdk.ProvisionerJobStatus
createTemplateCount int
templateVersionsCount int
uploadsCount int
templateByNameCount int
}
func (f *fakeClient) TemplateByName(ctx context.Context, orgID uuid.UUID, templateName string) (codersdk.Template, error) {
f.templateByNameCount++
require.Equal(f.t, f.expectedOrgID, orgID)
require.Equal(f.t, f.expectedTemplateName, templateName)
if f.templateByNameError != nil {
return codersdk.Template{}, f.templateByNameError
}
return codersdk.Template{
ID: uuid.New(),
Name: f.expectedTemplateName,
}, nil
}
func (f *fakeClient) CreateTemplate(ctx context.Context, orgID uuid.UUID, createReq codersdk.CreateTemplateRequest) (codersdk.Template, error) {
f.createTemplateCount++
require.Equal(f.t, f.expectedOrgID, orgID)
require.Equal(f.t, f.expectedTemplateName, createReq.Name)
return codersdk.Template{
ID: uuid.New(),
Name: f.expectedTemplateName,
}, nil
}
func (f *fakeClient) CreateTemplateVersion(ctx context.Context, orgID uuid.UUID, createReq codersdk.CreateTemplateVersionRequest) (codersdk.TemplateVersion, error) {
f.templateVersionsCount++
return codersdk.TemplateVersion{
ID: uuid.New(),
Name: f.expectedTemplateName,
MatchedProvisioners: &codersdk.MatchedProvisioners{Count: f.matchedProvisioners},
}, nil
}
func (f *fakeClient) Upload(ctx context.Context, contentType string, reader io.Reader) (codersdk.UploadResponse, error) {
f.uploadsCount++
return codersdk.UploadResponse{
ID: uuid.New(),
}, nil
}
func (f *fakeClient) TemplateVersion(ctx context.Context, versionID uuid.UUID) (codersdk.TemplateVersion, error) {
return codersdk.TemplateVersion{
ID: versionID,
Job: codersdk.ProvisionerJob{Status: f.templateVersionJobStatus},
MatchedProvisioners: &codersdk.MatchedProvisioners{Count: f.matchedProvisioners},
}, nil
}