mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: add Prometheus metrics for license warnings and errors (#21749)
Fixes: coder/internal#767 Adds two new Prometheus metrics for license health monitoring: - `coderd_license_warnings` - count of active license warnings - `coderd_license_errors` - count of active license errors Metrics endpoint after startup of a deployment with license enabled: ``` ... # HELP coderd_license_errors The number of active license errors. # TYPE coderd_license_errors gauge coderd_license_errors 0 ... # HELP coderd_license_warnings The number of active license warnings. # TYPE coderd_license_warnings gauge coderd_license_warnings 0 ... ```
This commit is contained in:
@@ -162,6 +162,12 @@ func (l *Set) Errors() []string {
|
||||
return slices.Clone(l.entitlements.Errors)
|
||||
}
|
||||
|
||||
func (l *Set) Warnings() []string {
|
||||
l.entitlementsMu.RLock()
|
||||
defer l.entitlementsMu.RUnlock()
|
||||
return slices.Clone(l.entitlements.Warnings)
|
||||
}
|
||||
|
||||
func (l *Set) HasLicense() bool {
|
||||
l.entitlementsMu.RLock()
|
||||
defer l.entitlementsMu.RUnlock()
|
||||
|
||||
@@ -147,8 +147,10 @@ deployment. They will always be available from the agent.
|
||||
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
|
||||
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
|
||||
| `coderd_license_active_users` | gauge | The number of active users. | |
|
||||
| `coderd_license_errors` | gauge | The number of active license errors. | |
|
||||
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
|
||||
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
|
||||
| `coderd_license_warnings` | gauge | The number of active license warnings. | |
|
||||
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
|
||||
| `coderd_oauth2_external_requests_rate_limit` | gauge | The total number of allowed requests per interval. | `name` `resource` |
|
||||
| `coderd_oauth2_external_requests_rate_limit_next_reset_unix` | gauge | Unix timestamp of the next interval | `name` `resource` |
|
||||
|
||||
@@ -11,6 +11,10 @@ var (
|
||||
activeUsersDesc = prometheus.NewDesc("coderd_license_active_users", "The number of active users.", nil, nil)
|
||||
limitUsersDesc = prometheus.NewDesc("coderd_license_limit_users", "The user seats limit based on the active Coder license.", nil, nil)
|
||||
userLimitEnabledDesc = prometheus.NewDesc("coderd_license_user_limit_enabled", "Returns 1 if the current license enforces the user limit.", nil, nil)
|
||||
|
||||
// Metrics for license warnings and errors.
|
||||
licenseWarningsDesc = prometheus.NewDesc("coderd_license_warnings", "The number of active license warnings.", nil, nil)
|
||||
licenseErrorsDesc = prometheus.NewDesc("coderd_license_errors", "The number of active license errors.", nil, nil)
|
||||
)
|
||||
|
||||
type MetricsCollector struct {
|
||||
@@ -23,9 +27,19 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
|
||||
descCh <- activeUsersDesc
|
||||
descCh <- limitUsersDesc
|
||||
descCh <- userLimitEnabledDesc
|
||||
descCh <- licenseWarningsDesc
|
||||
descCh <- licenseErrorsDesc
|
||||
}
|
||||
|
||||
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
|
||||
// Collect user limit metrics.
|
||||
mc.collectUserLimit(metricsCh)
|
||||
|
||||
// Collect license warnings and errors metrics.
|
||||
mc.collectWarningsAndErrors(metricsCh)
|
||||
}
|
||||
|
||||
func (mc *MetricsCollector) collectUserLimit(metricsCh chan<- prometheus.Metric) {
|
||||
userLimitEntitlement, ok := mc.Entitlements.Feature(codersdk.FeatureUserLimit)
|
||||
if !ok {
|
||||
return
|
||||
@@ -45,3 +59,11 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
|
||||
metricsCh <- prometheus.MustNewConstMetric(limitUsersDesc, prometheus.GaugeValue, float64(*userLimitEntitlement.Limit))
|
||||
}
|
||||
}
|
||||
|
||||
func (mc *MetricsCollector) collectWarningsAndErrors(metricsCh chan<- prometheus.Metric) {
|
||||
warnings := mc.Entitlements.Warnings()
|
||||
errors := mc.Entitlements.Errors()
|
||||
|
||||
metricsCh <- prometheus.MustNewConstMetric(licenseWarningsDesc, prometheus.GaugeValue, float64(len(warnings)))
|
||||
metricsCh <- prometheus.MustNewConstMetric(licenseErrorsDesc, prometheus.GaugeValue, float64(len(errors)))
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/aws/smithy-go/ptr"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
prometheus_client "github.com/prometheus/client_model/go"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/entitlements"
|
||||
@@ -48,16 +49,131 @@ func TestCollectLicenseMetrics(t *testing.T) {
|
||||
err = json.Unmarshal(goldenFile, &golden)
|
||||
require.NoError(t, err)
|
||||
|
||||
collected := map[string]int{}
|
||||
for _, metric := range metrics {
|
||||
switch metric.GetName() {
|
||||
case "coderd_license_active_users", "coderd_license_limit_users", "coderd_license_user_limit_enabled":
|
||||
for _, m := range metric.Metric {
|
||||
collected[metric.GetName()] = int(m.Gauge.GetValue())
|
||||
for name, expected := range golden {
|
||||
actual, ok := findMetric(metrics, name)
|
||||
require.True(t, ok, "metric %s not found", name)
|
||||
require.Equal(t, expected, actual, "metric %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectLicenseMetrics_WarningsAndErrors(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("NoWarningsOrErrors", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
var sut license.MetricsCollector
|
||||
sut.Entitlements = entitlements.New()
|
||||
|
||||
registry.Register(&sut)
|
||||
|
||||
metrics, err := registry.Gather()
|
||||
require.NoError(t, err)
|
||||
|
||||
warnings, ok := findMetric(metrics, "coderd_license_warnings")
|
||||
require.True(t, ok)
|
||||
require.Zero(t, warnings)
|
||||
|
||||
errors, ok := findMetric(metrics, "coderd_license_errors")
|
||||
require.True(t, ok)
|
||||
require.Zero(t, errors)
|
||||
})
|
||||
|
||||
t.Run("WithWarnings", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
var sut license.MetricsCollector
|
||||
sut.Entitlements = entitlements.New()
|
||||
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
|
||||
entitlements.Warnings = []string{
|
||||
"License expires in 30 days",
|
||||
"User limit is at 90% capacity",
|
||||
}
|
||||
})
|
||||
|
||||
registry.Register(&sut)
|
||||
|
||||
metrics, err := registry.Gather()
|
||||
require.NoError(t, err)
|
||||
|
||||
warnings, ok := findMetric(metrics, "coderd_license_warnings")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 2, warnings)
|
||||
|
||||
errors, ok := findMetric(metrics, "coderd_license_errors")
|
||||
require.True(t, ok)
|
||||
require.Zero(t, errors)
|
||||
})
|
||||
|
||||
t.Run("WithErrors", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
var sut license.MetricsCollector
|
||||
sut.Entitlements = entitlements.New()
|
||||
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
|
||||
entitlements.Errors = []string{
|
||||
"License has expired",
|
||||
}
|
||||
})
|
||||
|
||||
registry.Register(&sut)
|
||||
|
||||
metrics, err := registry.Gather()
|
||||
require.NoError(t, err)
|
||||
|
||||
warnings, ok := findMetric(metrics, "coderd_license_warnings")
|
||||
require.True(t, ok)
|
||||
require.Zero(t, warnings)
|
||||
|
||||
errors, ok := findMetric(metrics, "coderd_license_errors")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 1, errors)
|
||||
})
|
||||
|
||||
t.Run("WithBothWarningsAndErrors", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
var sut license.MetricsCollector
|
||||
sut.Entitlements = entitlements.New()
|
||||
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
|
||||
entitlements.Warnings = []string{
|
||||
"License expires in 7 days",
|
||||
"User limit is at 95% capacity",
|
||||
"Feature X is deprecated",
|
||||
}
|
||||
entitlements.Errors = []string{
|
||||
"Invalid license signature",
|
||||
"License UUID mismatch",
|
||||
}
|
||||
})
|
||||
|
||||
registry.Register(&sut)
|
||||
|
||||
metrics, err := registry.Gather()
|
||||
require.NoError(t, err)
|
||||
|
||||
warnings, ok := findMetric(metrics, "coderd_license_warnings")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 3, warnings)
|
||||
|
||||
errors, ok := findMetric(metrics, "coderd_license_errors")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 2, errors)
|
||||
})
|
||||
}
|
||||
|
||||
// findMetric searches for a metric by name and returns its value.
|
||||
func findMetric(metrics []*prometheus_client.MetricFamily, name string) (int, bool) {
|
||||
for _, metric := range metrics {
|
||||
if metric.GetName() == name {
|
||||
for _, m := range metric.Metric {
|
||||
return int(m.Gauge.GetValue()), true
|
||||
}
|
||||
default:
|
||||
require.FailNowf(t, "unexpected metric collected", "metric: %s", metric.GetName())
|
||||
}
|
||||
}
|
||||
require.EqualValues(t, golden, collected)
|
||||
return 0, false
|
||||
}
|
||||
|
||||
+3
-1
@@ -1,5 +1,7 @@
|
||||
{
|
||||
"coderd_license_active_users": 4,
|
||||
"coderd_license_limit_users": 7,
|
||||
"coderd_license_user_limit_enabled": 1
|
||||
"coderd_license_user_limit_enabled": 1,
|
||||
"coderd_license_warnings": 0,
|
||||
"coderd_license_errors": 0
|
||||
}
|
||||
|
||||
@@ -978,3 +978,9 @@ coderd_agentapi_metadata_flushed_total 71
|
||||
# HELP coderd_agentapi_metadata_publish_errors_total Total number of metadata batch pubsub publish calls that have resulted in an error.
|
||||
# TYPE coderd_agentapi_metadata_publish_errors_total counter
|
||||
coderd_agentapi_metadata_publish_errors_total 0
|
||||
# HELP coderd_license_warnings The number of active license warnings.
|
||||
# TYPE coderd_license_warnings gauge
|
||||
coderd_license_warnings 0
|
||||
# HELP coderd_license_errors The number of active license errors.
|
||||
# TYPE coderd_license_errors gauge
|
||||
coderd_license_errors 0
|
||||
|
||||
Reference in New Issue
Block a user