coder/coderd/prometheusmetrics/aggregator_test.go

package prometheusmetrics_test

import (
	"context"
	"fmt"
	"sort"
	"strings"
	"sync/atomic"
	"testing"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	dto "github.com/prometheus/client_model/go"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"cdr.dev/slog/v3/sloggers/slogtest"
	agentproto "github.com/coder/coder/v2/agent/proto"
	"github.com/coder/coder/v2/coderd/agentmetrics"
	"github.com/coder/coder/v2/coderd/prometheusmetrics"
	"github.com/coder/coder/v2/cryptorand"
	"github.com/coder/coder/v2/testutil"
)

const (
	testWorkspaceName = "yogi-workspace"
	testUsername      = "yogi-bear"
	testAgentName     = "main-agent"
	testTemplateName  = "main-template"
)

var testLabels = prometheusmetrics.AgentMetricLabels{
	Username:      testUsername,
	WorkspaceName: testWorkspaceName,
	AgentName:     testAgentName,
	TemplateName:  testTemplateName,
}

func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
	t.Parallel()

	// given
	registry := prometheus.NewRegistry()
	metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), registry, time.Hour, nil) // time.Hour, so metrics won't expire
	require.NoError(t, err)

	ctx, cancelFunc := context.WithCancel(context.Background())
	t.Cleanup(cancelFunc)

	closeFunc := metricsAggregator.Run(ctx)
	t.Cleanup(closeFunc)

	given1 := []*agentproto.Stats_Metric{
		{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
		// Tests that we update labels correctly when they have extra labels
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "lizz", Value: "rizz"},
		}},
		{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
	}

	given2 := []*agentproto.Stats_Metric{
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4},
		// Tests that we update labels correctly when they have extra labels
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "lizz", Value: "rizz"},
		}},
		{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5},
		{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "foobar", Value: "Foobaz"},
			{Name: "hello", Value: "world"},
		}},
		{Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6},
		{Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "foobar", Value: "Foo,ba=z"},
			{Name: "halo", Value: "wor\\,d=1,e=\\,2"},
			{Name: "hello", Value: "wo,,r=d"},
		}},
		{Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "empty", Value: ""},
			{Name: "foobar", Value: "foobaz"},
		}},
	}

	given3 := []*agentproto.Stats_Metric{
		{Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 17, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "cat", Value: "do,=g"},
			{Name: "hello", Value: "wo,,rld"},
		}},
		{Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 8, Labels: []*agentproto.Stats_Metric_Label{
			{Name: "foobar", Value: "foobaz"},
		}},
	}

	commonLabels := []*agentproto.Stats_Metric_Label{
		{Name: agentmetrics.LabelAgentName, Value: testAgentName},
		{Name: agentmetrics.LabelUsername, Value: testUsername},
		{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
		{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
	}
	expected := []*agentproto.Stats_Metric{
		{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
			{Name: agentmetrics.LabelAgentName, Value: testAgentName},
			{Name: "lizz", Value: "rizz"},
			{Name: agentmetrics.LabelUsername, Value: testUsername},
			{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
			{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
		}},
		{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels},
		{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
			{Name: agentmetrics.LabelAgentName, Value: testAgentName},
			{Name: "foobar", Value: "Foobaz"},
			{Name: "hello", Value: "world"},
			{Name: agentmetrics.LabelUsername, Value: testUsername},
			{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
			{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
		}},
		{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels},
		{Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: commonLabels},
		{Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 17, Labels: []*agentproto.Stats_Metric_Label{
			{Name: agentmetrics.LabelAgentName, Value: testAgentName},
			{Name: "cat", Value: "do,=g"},
			{Name: "hello", Value: "wo,,rld"},
			{Name: agentmetrics.LabelUsername, Value: testUsername},
			{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
			{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
		}},
		{Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{
			{Name: agentmetrics.LabelAgentName, Value: testAgentName},
			{Name: "foobar", Value: "Foo,ba=z"},
			{Name: "halo", Value: "wor\\,d=1,e=\\,2"},
			{Name: "hello", Value: "wo,,r=d"},
			{Name: agentmetrics.LabelUsername, Value: testUsername},
			{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
			{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
		}},
		{Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 8, Labels: []*agentproto.Stats_Metric_Label{
			{Name: agentmetrics.LabelAgentName, Value: testAgentName},
			{Name: "foobar", Value: "foobaz"},
			{Name: agentmetrics.LabelUsername, Value: testUsername},
			{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
			{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
		}},
	}

	// when
	metricsAggregator.Update(ctx, testLabels, given1)
	metricsAggregator.Update(ctx, testLabels, given2)
	metricsAggregator.Update(ctx, testLabels, given3)

	// then
	require.Eventually(t, func() bool {
		var actual []prometheus.Metric
		metricsCh := make(chan prometheus.Metric)

		done := make(chan struct{}, 1)
		defer close(done)
		go func() {
			for m := range metricsCh {
				actual = append(actual, m)
			}
			done <- struct{}{}
		}()
		metricsAggregator.Collect(metricsCh)
		close(metricsCh)
		<-done
		return verifyCollectedMetrics(t, expected, actual)
	}, testutil.WaitMedium, testutil.IntervalSlow)
}

func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool {
	if len(expected) != len(actual) {
		t.Logf("expected %d metrics, got %d", len(expected), len(actual))
		return false
	}

	// ensure stable iteration order
	sort.Slice(expected, func(i, j int) bool {
		return expected[i].Name < expected[j].Name
	})

	sort.Slice(actual, func(i, j int) bool {
		m1 := prometheusMetricToString(t, actual[i])
		m2 := prometheusMetricToString(t, actual[j])
		return m1 < m2
	})

	for i, e := range expected {
		desc := actual[i].Desc()
		assert.Contains(t, desc.String(), e.Name)

		var d dto.Metric
		err := actual[i].Write(&d)
		assert.NoError(t, err)

		switch e.Type {
		case agentproto.Stats_Metric_COUNTER:
			if e.Value != d.Counter.GetValue() {
				return false
			}
		case agentproto.Stats_Metric_GAUGE:
			if e.Value != d.Gauge.GetValue() {
				return false
			}
		default:
			assert.Failf(t, "unsupported type: %s", string(e.Type))
		}

		expectedLabels := make([]*agentproto.Stats_Metric_Label, len(e.Labels))
		copy(expectedLabels, e.Labels)

		dtoLabels := asMetricAgentLabels(d.GetLabel())
		// dto labels are sorted in alphabetical order.
		sortFn := func(i, j int) bool {
			return expectedLabels[i].Name < expectedLabels[j].Name
		}
		sort.Slice(expectedLabels, sortFn)
		sort.Slice(dtoLabels, sortFn)
		assert.Equal(t, expectedLabels, dtoLabels, d.String())
	}
	return true
}

func prometheusMetricToString(t *testing.T, m prometheus.Metric) string {
	var sb strings.Builder

	desc := m.Desc()
	_, _ = sb.WriteString(desc.String())
	_ = sb.WriteByte('|')

	var d dto.Metric
	err := m.Write(&d)
	assert.NoError(t, err)
	dtoLabels := asMetricAgentLabels(d.GetLabel())
	sort.Slice(dtoLabels, func(i, j int) bool {
		return dtoLabels[i].Name < dtoLabels[j].Name
	})

	for _, dtoLabel := range dtoLabels {
		if dtoLabel.Value == "" {
			continue
		}
		_, _ = sb.WriteString(dtoLabel.Name)
		_ = sb.WriteByte('=')
		_, _ = sb.WriteString(prometheusmetrics.MetricLabelValueEncoder.Replace(dtoLabel.Value))
	}
	return strings.TrimRight(sb.String(), ",")
}

func asMetricAgentLabels(dtoLabels []*dto.LabelPair) []*agentproto.Stats_Metric_Label {
	metricLabels := make([]*agentproto.Stats_Metric_Label, 0, len(dtoLabels))
	for _, dtoLabel := range dtoLabels {
		if dtoLabel.GetValue() == "" {
			continue
		}

		metricLabels = append(metricLabels, &agentproto.Stats_Metric_Label{
			Name:  dtoLabel.GetName(),
			Value: dtoLabel.GetValue(),
		})
	}
	return metricLabels
}

func TestUpdateMetrics_MetricsExpire(t *testing.T) {
	t.Parallel()

	// given
	registry := prometheus.NewRegistry()
	metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), registry, time.Millisecond, agentmetrics.LabelAll)
	require.NoError(t, err)

	ctx, cancelFunc := context.WithCancel(context.Background())
	t.Cleanup(cancelFunc)

	closeFunc := metricsAggregator.Run(ctx)
	t.Cleanup(closeFunc)

	given := []*agentproto.Stats_Metric{
		{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
	}

	// when
	metricsAggregator.Update(ctx, testLabels, given)

	time.Sleep(time.Millisecond * 10) // Ensure that metric is expired

	// then
	require.Eventually(t, func() bool {
		var actual []prometheus.Metric
		metricsCh := make(chan prometheus.Metric)

		done := make(chan struct{}, 1)
		defer close(done)
		go func() {
			for m := range metricsCh {
				actual = append(actual, m)
			}
			done <- struct{}{}
		}()
		metricsAggregator.Collect(metricsCh)
		close(metricsCh)
		<-done
		return len(actual) == 0
	}, testutil.WaitShort, testutil.IntervalFast)
}

func TestLabelsAggregation(t *testing.T) {
	t.Parallel()

	type statCollection struct {
		labels  prometheusmetrics.AgentMetricLabels
		metrics []*agentproto.Stats_Metric
	}

	commonLabels := []*agentproto.Stats_Metric_Label{
		{Name: agentmetrics.LabelUsername, Value: testUsername},
		{Name: agentmetrics.LabelAgentName, Value: testAgentName},
		{Name: agentmetrics.LabelWorkspaceName, Value: testWorkspaceName},
		{Name: agentmetrics.LabelTemplateName, Value: testTemplateName},
	}

	tests := []struct {
		name        string
		given       []statCollection
		expected    []*agentproto.Stats_Metric
		aggregateOn []string
	}{
		{
			name:        "label aggregations not specified, keep all (high cardinality, default behavior)",
			aggregateOn: agentmetrics.LabelAll,
			given: []statCollection{
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "active_conns", Type: agentproto.Stats_Metric_GAUGE, Value: 4},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
				{Name: "active_conns", Type: agentproto.Stats_Metric_GAUGE, Value: 4, Labels: commonLabels},
			},
		},
		{
			// Scenario: 2 users are using the same agent and we've configured the deployment to aggregate on the "agent_name" label.
			name:        "single label aggregation, aggregating to single metric",
			aggregateOn: []string{agentmetrics.LabelAgentName},
			given: []statCollection{
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:  "user1",
						AgentName: "agent1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:  "user2",
						AgentName: "agent1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				// We only observed one agent_name value, so all metrics are aggregated to a single series.
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 8, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelAgentName, Value: "agent1"},
				}},
			},
		},
		{
			// Scenario: as above, but we're aggregating on two invariant labels.
			name:        "multiple label aggregation, aggregating to single metric",
			aggregateOn: []string{agentmetrics.LabelAgentName, agentmetrics.LabelTemplateName},
			given: []statCollection{
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user1",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user2",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				// We only observed one agent_name & template_name tuple, so all metrics are aggregated to a single series.
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 8, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelAgentName, Value: "agent1"},
					{Name: agentmetrics.LabelTemplateName, Value: "template1"},
				}},
			},
		},
		{
			// Scenario: aggregating on a label which is unique across all metrics.
			name:        "single label aggregation, aggregating to multiple metrics",
			aggregateOn: []string{agentmetrics.LabelUsername},
			given: []statCollection{
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user1",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user2",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				// We observed two unique username values, and therefore we have a metric for each.
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelUsername, Value: "user1"},
				}},
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelUsername, Value: "user2"},
				}},
			},
		},
		{
			// Scenario: aggregating on a label which is unique across all metrics, plus two invariant labels.
			name:        "multiple label aggregation, aggregating to multiple metrics",
			aggregateOn: []string{agentmetrics.LabelUsername, agentmetrics.LabelAgentName, agentmetrics.LabelTemplateName},
			given: []statCollection{
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user1",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "user2",
						AgentName:    "agent1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				// We observed two unique username values, and therefore we have a metric for each.
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelUsername, Value: "user1"},
					{Name: agentmetrics.LabelAgentName, Value: "agent1"},
					{Name: agentmetrics.LabelTemplateName, Value: "template1"},
				}},
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 7, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelUsername, Value: "user2"},
					{Name: agentmetrics.LabelAgentName, Value: "agent1"},
					{Name: agentmetrics.LabelTemplateName, Value: "template1"},
				}},
			},
		},
		{
			name:        "extra labels are retained, even with label aggregations",
			aggregateOn: []string{agentmetrics.LabelUsername},
			given: []statCollection{
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "extra_label", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
							{Name: "lizz", Value: "rizz"},
						}},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelUsername, Value: testUsername},
				}},
				{Name: "extra_label", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
					{Name: "lizz", Value: "rizz"},
					{Name: agentmetrics.LabelUsername, Value: testUsername},
				}},
			},
		},
		{
			// Both counters and gauges should have all their values summed to produce the correct output.
			name:        "counters & gauges behave identically",
			aggregateOn: []string{agentmetrics.LabelTemplateName},
			given: []statCollection{
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "username1",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
						{Name: "active_conns", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
					},
				},
				{
					labels: prometheusmetrics.AgentMetricLabels{
						Username:     "username2",
						TemplateName: "template1",
					},
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
						{Name: "active_conns", Type: agentproto.Stats_Metric_GAUGE, Value: 4},
					},
				},
			},
			expected: []*agentproto.Stats_Metric{
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 3, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelTemplateName, Value: "template1"},
				}},
				{Name: "active_conns", Type: agentproto.Stats_Metric_GAUGE, Value: 7, Labels: []*agentproto.Stats_Metric_Label{
					{Name: agentmetrics.LabelTemplateName, Value: "template1"},
				}},
			},
		},
		{
			// Scenario: validation fails and an invalid label is selected for aggregation.
			name:        "invalid label aggregation",
			aggregateOn: []string{"nonsense"},
			given: []statCollection{
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
			},
			// Nothing will be returned.
			expected: []*agentproto.Stats_Metric{},
		},
		{
			// Scenario: validation fails and an empty list is given for aggregation.
			name:        "empty label aggregation list",
			aggregateOn: []string{},
			given: []statCollection{
				{
					labels: testLabels,
					metrics: []*agentproto.Stats_Metric{
						{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
					},
				},
			},
			// Default aggregation will be used.
			expected: []*agentproto.Stats_Metric{
				{Name: "user_counter", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
			},
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			t.Parallel()

			// given
			registry := prometheus.NewRegistry()
			metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), registry, time.Hour, tc.aggregateOn) // time.Hour, so metrics won't expire
			require.NoError(t, err)

			ctx, cancelFunc := context.WithCancel(context.Background())
			t.Cleanup(cancelFunc)

			closeFunc := metricsAggregator.Run(ctx)
			t.Cleanup(closeFunc)

			// when
			for _, sc := range tc.given {
				metricsAggregator.Update(ctx, sc.labels, sc.metrics)
			}

			// then
			require.Eventually(t, func() bool {
				var actual []prometheus.Metric
				metricsCh := make(chan prometheus.Metric)

				done := make(chan struct{}, 1)
				defer close(done)
				go func() {
					for m := range metricsCh {
						actual = append(actual, m)
					}
					done <- struct{}{}
				}()
				metricsAggregator.Collect(metricsCh)
				close(metricsCh)
				<-done
				return verifyCollectedMetrics(t, tc.expected, actual)
			}, testutil.WaitMedium, testutil.IntervalSlow)
		})
	}
}

func Benchmark_MetricsAggregator_Run(b *testing.B) {
	benchmarkRunner(b, agentmetrics.LabelAll)
}

func Benchmark_MetricsAggregator_RunWithAggregations(b *testing.B) {
	for i := 1; i <= len(agentmetrics.LabelAll); i++ {
		b.Run(fmt.Sprintf("%d labels", i), func(b *testing.B) {
			benchmarkRunner(b, agentmetrics.LabelAll[0:i])
		})
	}
}

func benchmarkRunner(b *testing.B, aggregateByLabels []string) {
	b.ReportAllocs()

	// Number of metrics to generate and send in each iteration.
	// Hard-coded to 1024 to avoid overflowing the queue in the metrics aggregator.
	numMetrics := 1024

	// given
	registry := prometheus.NewRegistry()
	metricsAggregator := must(prometheusmetrics.NewMetricsAggregator(slogtest.Make(b, &slogtest.Options{IgnoreErrors: true}), registry, time.Hour, aggregateByLabels))

	ctx, cancelFunc := context.WithCancel(context.Background())
	b.Cleanup(cancelFunc)

	closeFunc := metricsAggregator.Run(ctx)
	b.Cleanup(closeFunc)

	ch := make(chan prometheus.Metric)
	go func() {
		for {
			select {
			case <-ctx.Done():
				return
			default:
				metricsAggregator.Collect(ch)
			}
		}
	}()

	for i := 0; i < b.N; i++ {
		b.StopTimer()
		b.Logf("N=%d generating %d metrics", b.N, numMetrics)
		metrics := make([]*agentproto.Stats_Metric, 0, numMetrics)
		for i := 0; i < numMetrics; i++ {
			metrics = append(metrics, genAgentMetric(b))
		}

		b.Logf("N=%d sending %d metrics", b.N, numMetrics)
		var nGot atomic.Int64
		b.StartTimer()
		metricsAggregator.Update(ctx, testLabels, metrics)
		for i := 0; i < numMetrics; i++ {
			select {
			case <-ctx.Done():
				b.FailNow()
			case <-ch:
				nGot.Add(1)
			}
		}
		b.StopTimer()
		b.Logf("N=%d got %d metrics", b.N, nGot.Load())
	}
}

func genAgentMetric(t testing.TB) *agentproto.Stats_Metric {
	t.Helper()

	var metricType agentproto.Stats_Metric_Type
	if must(cryptorand.Float64()) >= 0.5 {
		metricType = agentproto.Stats_Metric_COUNTER
	} else {
		metricType = agentproto.Stats_Metric_GAUGE
	}

	// Ensure that metric name does not start or end with underscore, as it is not allowed by Prometheus.
	metricName := "metric_" + must(cryptorand.StringCharset(cryptorand.Alpha, 80)) + "_gen"
	// Generate random metric value between 0 and 1000.
	metricValue := must(cryptorand.Float64()) * float64(must(cryptorand.Intn(1000)))

	return &agentproto.Stats_Metric{
		Name: metricName, Type: metricType, Value: metricValue, Labels: []*agentproto.Stats_Metric_Label{},
	}
}

func must[T any](t T, err error) T {
	if err != nil {
		panic(err)
	}
	return t
}