chore: add tx metrics and logs for serialization errors (#15215)

Before db_metrics were all or nothing. Now `InTx` metrics are always recorded, and query metrics are opt in.


Adds instrumentation & logging around serialization failures in the database.
This commit is contained in:
Steven Masley
2024-10-25 12:14:15 -04:00
committed by GitHub
parent df34858c3c
commit ccfffc6911
32 changed files with 3123 additions and 2800 deletions
+3 -1
View File
@@ -718,7 +718,9 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
} }
if options.DeploymentValues.Prometheus.Enable && options.DeploymentValues.Prometheus.CollectDBMetrics { if options.DeploymentValues.Prometheus.Enable && options.DeploymentValues.Prometheus.CollectDBMetrics {
options.Database = dbmetrics.New(options.Database, options.PrometheusRegistry) options.Database = dbmetrics.NewQueryMetrics(options.Database, options.Logger, options.PrometheusRegistry)
} else {
options.Database = dbmetrics.NewDBMetrics(options.Database, options.Logger, options.PrometheusRegistry)
} }
var deploymentID string var deploymentID string
+3 -1
View File
@@ -145,7 +145,9 @@ INTROSPECTION / PROMETHEUS OPTIONS:
Collect agent stats (may increase charges for metrics storage). Collect agent stats (may increase charges for metrics storage).
--prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false) --prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false)
Collect database metrics (may increase charges for metrics storage). Collect database query metrics (may increase charges for metrics
storage). If set to false, a reduced set of database metrics are still
collected.
--prometheus-enable bool, $CODER_PROMETHEUS_ENABLE --prometheus-enable bool, $CODER_PROMETHEUS_ENABLE
Serve prometheus metrics on the address defined by prometheus address. Serve prometheus metrics on the address defined by prometheus address.
+2 -1
View File
@@ -197,7 +197,8 @@ introspection:
- template_name - template_name
- username - username
- workspace_name - workspace_name
# Collect database metrics (may increase charges for metrics storage). # Collect database query metrics (may increase charges for metrics storage). If
# set to false, a reduced set of database metrics are still collected.
# (default: false, type: bool) # (default: false, type: bool)
collect_db_metrics: false collect_db_metrics: false
pprof: pprof:
+4 -1
View File
@@ -285,7 +285,10 @@ func (e *Executor) runOnce(t time.Time) Stats {
// Run with RepeatableRead isolation so that the build process sees the same data // Run with RepeatableRead isolation so that the build process sees the same data
// as our calculation that determines whether an autobuild is necessary. // as our calculation that determines whether an autobuild is necessary.
}, &sql.TxOptions{Isolation: sql.LevelRepeatableRead}) }, &database.TxOptions{
Isolation: sql.LevelRepeatableRead,
TxIdentifier: "lifecycle",
})
if auditLog != nil { if auditLog != nil {
// If the transition didn't succeed then updating the workspace // If the transition didn't succeed then updating the workspace
// to indicate dormant didn't either. // to indicate dormant didn't either.
+3
View File
@@ -0,0 +1,3 @@
// Package promhelp provides helper functions for asserting Prometheus
// metric values in unit tests.
package promhelp
+87
View File
@@ -0,0 +1,87 @@
package promhelp
import (
"context"
"io"
"maps"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
ptestutil "github.com/prometheus/client_golang/prometheus/testutil"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/require"
)
// RegistryDump returns the http page for a given registry's metrics.
// Very useful for visual debugging.
func RegistryDump(reg *prometheus.Registry) string {
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
rec := httptest.NewRecorder()
req, _ := http.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil)
h.ServeHTTP(rec, req)
resp := rec.Result()
data, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return string(data)
}
// Compare can be used to compare a registry to some prometheus formatted
// text. If any values differ, an error is returned.
// If metric names are passed in, only those metrics will be compared.
// Usage: `Compare(reg, RegistryDump(reg))`
func Compare(reg prometheus.Gatherer, compare string, metricNames ...string) error {
return ptestutil.GatherAndCompare(reg, strings.NewReader(compare), metricNames...)
}
// HistogramValue returns the value of a histogram metric with the given name and labels.
func HistogramValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Histogram {
t.Helper()
labeled := MetricValue(t, reg, metricName, labels)
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
return labeled.GetHistogram()
}
// GaugeValue returns the value of a gauge metric with the given name and labels.
func GaugeValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
t.Helper()
labeled := MetricValue(t, reg, metricName, labels)
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
return int(labeled.GetGauge().GetValue())
}
// CounterValue returns the value of a counter metric with the given name and labels.
func CounterValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
t.Helper()
labeled := MetricValue(t, reg, metricName, labels)
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
return int(labeled.GetCounter().GetValue())
}
func MetricValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Metric {
t.Helper()
metrics, err := reg.Gather()
require.NoError(t, err)
for _, m := range metrics {
if m.GetName() == metricName {
for _, labeled := range m.GetMetric() {
mLabels := make(prometheus.Labels)
for _, v := range labeled.GetLabel() {
mLabels[v.GetName()] = v.GetValue()
}
if maps.Equal(mLabels, labels) {
return labeled
}
}
}
}
return nil
}
+3 -2
View File
@@ -161,8 +161,9 @@ func (k *rotator) rotateKeys(ctx context.Context) error {
} }
} }
return nil return nil
}, &sql.TxOptions{ }, &database.TxOptions{
Isolation: sql.LevelRepeatableRead, Isolation: sql.LevelRepeatableRead,
TxIdentifier: "rotate_keys",
}) })
} }
+61 -8
View File
@@ -28,7 +28,7 @@ type Store interface {
wrapper wrapper
Ping(ctx context.Context) (time.Duration, error) Ping(ctx context.Context) (time.Duration, error)
InTx(func(Store) error, *sql.TxOptions) error InTx(func(Store) error, *TxOptions) error
} }
type wrapper interface { type wrapper interface {
@@ -57,6 +57,43 @@ func New(sdb *sql.DB) Store {
} }
} }
// TxOptions is used to pass some execution metadata to the callers.
// Ideally we could throw this into a context, but no context is used for
// transactions. So instead, the return context is attached to the options
// passed in.
// This metadata should not be returned in the method signature, because it
// is only used for metric tracking. It should never be used by business logic.
type TxOptions struct {
// Isolation is the transaction isolation level.
// If zero, the driver or database's default level is used.
Isolation sql.IsolationLevel
ReadOnly bool
// -- Coder specific metadata --
// TxIdentifier is a unique identifier for the transaction to be used
// in metrics. Can be any string.
TxIdentifier string
// Set by InTx
executionCount int
}
// IncrementExecutionCount is a helper function for external packages
// to increment the unexported count.
// Mainly for `dbmem`.
func IncrementExecutionCount(opts *TxOptions) {
opts.executionCount++
}
func (o TxOptions) ExecutionCount() int {
return o.executionCount
}
func (o *TxOptions) WithID(id string) *TxOptions {
o.TxIdentifier = id
return o
}
// queries encompasses both are sqlc generated // queries encompasses both are sqlc generated
// queries and our custom queries. // queries and our custom queries.
type querier interface { type querier interface {
@@ -80,11 +117,24 @@ func (q *sqlQuerier) Ping(ctx context.Context) (time.Duration, error) {
return time.Since(start), err return time.Since(start), err
} }
func (q *sqlQuerier) InTx(function func(Store) error, txOpts *sql.TxOptions) error { func DefaultTXOptions() *TxOptions {
return &TxOptions{
Isolation: sql.LevelDefault,
ReadOnly: false,
}
}
func (q *sqlQuerier) InTx(function func(Store) error, txOpts *TxOptions) error {
_, inTx := q.db.(*sqlx.Tx) _, inTx := q.db.(*sqlx.Tx)
isolation := sql.LevelDefault
if txOpts != nil { if txOpts == nil {
isolation = txOpts.Isolation // create a default txOpts if left to nil
txOpts = DefaultTXOptions()
}
sqlOpts := &sql.TxOptions{
Isolation: txOpts.Isolation,
ReadOnly: txOpts.ReadOnly,
} }
// If we are not already in a transaction, and we are running in serializable // If we are not already in a transaction, and we are running in serializable
@@ -92,13 +142,14 @@ func (q *sqlQuerier) InTx(function func(Store) error, txOpts *sql.TxOptions) err
// prepared to allow retries if using serializable mode. // prepared to allow retries if using serializable mode.
// If we are in a transaction already, the parent InTx call will handle the retry. // If we are in a transaction already, the parent InTx call will handle the retry.
// We do not want to duplicate those retries. // We do not want to duplicate those retries.
if !inTx && isolation == sql.LevelSerializable { if !inTx && sqlOpts.Isolation == sql.LevelSerializable {
// This is an arbitrarily chosen number. // This is an arbitrarily chosen number.
const retryAmount = 3 const retryAmount = 3
var err error var err error
attempts := 0 attempts := 0
for attempts = 0; attempts < retryAmount; attempts++ { for attempts = 0; attempts < retryAmount; attempts++ {
err = q.runTx(function, txOpts) txOpts.executionCount++
err = q.runTx(function, sqlOpts)
if err == nil { if err == nil {
// Transaction succeeded. // Transaction succeeded.
return nil return nil
@@ -111,7 +162,9 @@ func (q *sqlQuerier) InTx(function func(Store) error, txOpts *sql.TxOptions) err
// Transaction kept failing in serializable mode. // Transaction kept failing in serializable mode.
return xerrors.Errorf("transaction failed after %d attempts: %w", attempts, err) return xerrors.Errorf("transaction failed after %d attempts: %w", attempts, err)
} }
return q.runTx(function, txOpts)
txOpts.executionCount++
return q.runTx(function, sqlOpts)
} }
// InTx performs database operations inside a transaction. // InTx performs database operations inside a transaction.
+1 -1
View File
@@ -27,7 +27,7 @@ func TestSerializedRetry(t *testing.T) {
db := database.New(sqlDB) db := database.New(sqlDB)
called := 0 called := 0
txOpts := &sql.TxOptions{Isolation: sql.LevelSerializable} txOpts := &database.TxOptions{Isolation: sql.LevelSerializable}
err := db.InTx(func(tx database.Store) error { err := db.InTx(func(tx database.Store) error {
// Test nested error // Test nested error
return tx.InTx(func(tx database.Store) error { return tx.InTx(func(tx database.Store) error {
+1 -1
View File
@@ -558,7 +558,7 @@ func (q *querier) Ping(ctx context.Context) (time.Duration, error) {
} }
// InTx runs the given function in a transaction. // InTx runs the given function in a transaction.
func (q *querier) InTx(function func(querier database.Store) error, txOpts *sql.TxOptions) error { func (q *querier) InTx(function func(querier database.Store) error, txOpts *database.TxOptions) error {
return q.db.InTx(func(tx database.Store) error { return q.db.InTx(func(tx database.Store) error {
// Wrap the transaction store in a querier. // Wrap the transaction store in a querier.
wrapped := New(tx, q.auth, q.log, q.acs) wrapped := New(tx, q.auth, q.log, q.acs)
+4 -1
View File
@@ -365,7 +365,7 @@ func (tx *fakeTx) releaseLocks() {
} }
// InTx doesn't rollback data properly for in-memory yet. // InTx doesn't rollback data properly for in-memory yet.
func (q *FakeQuerier) InTx(fn func(database.Store) error, _ *sql.TxOptions) error { func (q *FakeQuerier) InTx(fn func(database.Store) error, opts *database.TxOptions) error {
q.mutex.Lock() q.mutex.Lock()
defer q.mutex.Unlock() defer q.mutex.Unlock()
tx := &fakeTx{ tx := &fakeTx{
@@ -374,6 +374,9 @@ func (q *FakeQuerier) InTx(fn func(database.Store) error, _ *sql.TxOptions) erro
} }
defer tx.releaseLocks() defer tx.releaseLocks()
if opts != nil {
database.IncrementExecutionCount(opts)
}
return fn(tx) return fn(tx)
} }
File diff suppressed because it is too large Load Diff
+109
View File
@@ -0,0 +1,109 @@
package dbmetrics_test
import (
"bytes"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/coderdtest/promhelp"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbmem"
"github.com/coder/coder/v2/coderd/database/dbmetrics"
)
func TestInTxMetrics(t *testing.T) {
t.Parallel()
successLabels := prometheus.Labels{
"success": "true",
"id": "",
}
const inTxHistMetricName = "coderd_db_tx_duration_seconds"
const inTxCountMetricName = "coderd_db_tx_executions_count"
t.Run("QueryMetrics", func(t *testing.T) {
t.Parallel()
db := dbmem.New()
reg := prometheus.NewRegistry()
db = dbmetrics.NewQueryMetrics(db, slogtest.Make(t, nil), reg)
err := db.InTx(func(s database.Store) error {
return nil
}, nil)
require.NoError(t, err)
// Check that the metrics are registered
inTxMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, successLabels)
require.NotNil(t, inTxMetric)
require.Equal(t, uint64(1), inTxMetric.GetSampleCount())
})
t.Run("DBMetrics", func(t *testing.T) {
t.Parallel()
db := dbmem.New()
reg := prometheus.NewRegistry()
db = dbmetrics.NewDBMetrics(db, slogtest.Make(t, nil), reg)
err := db.InTx(func(s database.Store) error {
return nil
}, nil)
require.NoError(t, err)
// Check that the metrics are registered
inTxMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, successLabels)
require.NotNil(t, inTxMetric)
require.Equal(t, uint64(1), inTxMetric.GetSampleCount())
})
// Test log output and metrics on failures
// Log example:
// [erro] database transaction hit serialization error and had to retry success=false executions=2 id=foobar_factory
t.Run("SerializationError", func(t *testing.T) {
t.Parallel()
var output bytes.Buffer
logger := slog.Make(sloghuman.Sink(&output))
reg := prometheus.NewRegistry()
db := dbmetrics.NewDBMetrics(dbmem.New(), logger, reg)
const id = "foobar_factory"
txOpts := database.DefaultTXOptions().WithID(id)
database.IncrementExecutionCount(txOpts) // 2 executions
err := db.InTx(func(s database.Store) error {
return xerrors.Errorf("some dumb error")
}, txOpts)
require.Error(t, err)
// Check that the metrics are registered
inTxHistMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, prometheus.Labels{
"success": "false",
"id": id,
})
require.NotNil(t, inTxHistMetric)
require.Equal(t, uint64(1), inTxHistMetric.GetSampleCount())
inTxCountMetric := promhelp.CounterValue(t, reg, inTxCountMetricName, prometheus.Labels{
"success": "false",
"retries": "1",
"id": id,
})
require.NotNil(t, inTxCountMetric)
require.Equal(t, 1, inTxCountMetric)
// Also check the logs
require.Contains(t, output.String(), "some dumb error")
require.Contains(t, output.String(), "database transaction hit serialization error and had to retry")
require.Contains(t, output.String(), "success=false")
require.Contains(t, output.String(), "executions=2")
require.Contains(t, output.String(), "id="+id)
})
}
File diff suppressed because it is too large Load Diff
+1 -2
View File
@@ -11,7 +11,6 @@ package dbmock
import ( import (
context "context" context "context"
sql "database/sql"
reflect "reflect" reflect "reflect"
time "time" time "time"
@@ -3489,7 +3488,7 @@ func (mr *MockStoreMockRecorder) GetWorkspacesEligibleForTransition(arg0, arg1 a
} }
// InTx mocks base method. // InTx mocks base method.
func (m *MockStore) InTx(arg0 func(database.Store) error, arg1 *sql.TxOptions) error { func (m *MockStore) InTx(arg0 func(database.Store) error, arg1 *database.TxOptions) error {
m.ctrl.T.Helper() m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "InTx", arg0, arg1) ret := m.ctrl.Call(m, "InTx", arg0, arg1)
ret0, _ := ret[0].(error) ret0, _ := ret[0].(error)
+1 -1
View File
@@ -66,7 +66,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
logger.Info(ctx, "purged old database entries", slog.F("duration", clk.Since(start))) logger.Info(ctx, "purged old database entries", slog.F("duration", clk.Since(start)))
return nil return nil
}, nil); err != nil { }, database.DefaultTXOptions().WithID("db_purge")); err != nil {
logger.Error(ctx, "failed to purge old database entries", slog.Error(err)) logger.Error(ctx, "failed to purge old database entries", slog.Error(err))
return return
} }
+1 -1
View File
@@ -108,7 +108,7 @@ func (r *Rolluper) start(ctx context.Context) {
ev.TemplateUsageStats = true ev.TemplateUsageStats = true
return tx.UpsertTemplateUsageStats(ctx) return tx.UpsertTemplateUsageStats(ctx)
}, nil) }, database.DefaultTXOptions().WithID("db_rollup"))
}) })
err := eg.Wait() err := eg.Wait()
+1 -1
View File
@@ -38,7 +38,7 @@ type wrapUpsertDB struct {
resume <-chan struct{} resume <-chan struct{}
} }
func (w *wrapUpsertDB) InTx(fn func(database.Store) error, opts *sql.TxOptions) error { func (w *wrapUpsertDB) InTx(fn func(database.Store) error, opts *database.TxOptions) error {
return w.Store.InTx(func(tx database.Store) error { return w.Store.InTx(func(tx database.Store) error {
return fn(&wrapUpsertDB{Store: tx, resume: w.resume}) return fn(&wrapUpsertDB{Store: tx, resume: w.resume})
}, opts) }, opts)
+1 -1
View File
@@ -33,7 +33,7 @@ func ReadModifyUpdate(db Store, f func(tx Store) error,
) error { ) error {
var err error var err error
for retries := 0; retries < maxRetries; retries++ { for retries := 0; retries < maxRetries; retries++ {
err = db.InTx(f, &sql.TxOptions{ err = db.InTx(f, &TxOptions{
Isolation: sql.LevelRepeatableRead, Isolation: sql.LevelRepeatableRead,
}) })
var pqe *pq.Error var pqe *pq.Error
+5 -5
View File
@@ -19,7 +19,7 @@ func TestReadModifyUpdate_OK(t *testing.T) {
mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB := dbmock.NewMockStore(gomock.NewController(t))
mDB.EXPECT(). mDB.EXPECT().
InTx(gomock.Any(), &sql.TxOptions{Isolation: sql.LevelRepeatableRead}). InTx(gomock.Any(), &database.TxOptions{Isolation: sql.LevelRepeatableRead}).
Times(1). Times(1).
Return(nil) Return(nil)
err := database.ReadModifyUpdate(mDB, func(tx database.Store) error { err := database.ReadModifyUpdate(mDB, func(tx database.Store) error {
@@ -34,11 +34,11 @@ func TestReadModifyUpdate_RetryOK(t *testing.T) {
mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB := dbmock.NewMockStore(gomock.NewController(t))
firstUpdate := mDB.EXPECT(). firstUpdate := mDB.EXPECT().
InTx(gomock.Any(), &sql.TxOptions{Isolation: sql.LevelRepeatableRead}). InTx(gomock.Any(), &database.TxOptions{Isolation: sql.LevelRepeatableRead}).
Times(1). Times(1).
Return(&pq.Error{Code: pq.ErrorCode("40001")}) Return(&pq.Error{Code: pq.ErrorCode("40001")})
mDB.EXPECT(). mDB.EXPECT().
InTx(gomock.Any(), &sql.TxOptions{Isolation: sql.LevelRepeatableRead}). InTx(gomock.Any(), &database.TxOptions{Isolation: sql.LevelRepeatableRead}).
After(firstUpdate). After(firstUpdate).
Times(1). Times(1).
Return(nil) Return(nil)
@@ -55,7 +55,7 @@ func TestReadModifyUpdate_HardError(t *testing.T) {
mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB := dbmock.NewMockStore(gomock.NewController(t))
mDB.EXPECT(). mDB.EXPECT().
InTx(gomock.Any(), &sql.TxOptions{Isolation: sql.LevelRepeatableRead}). InTx(gomock.Any(), &database.TxOptions{Isolation: sql.LevelRepeatableRead}).
Times(1). Times(1).
Return(xerrors.New("a bad thing happened")) Return(xerrors.New("a bad thing happened"))
@@ -71,7 +71,7 @@ func TestReadModifyUpdate_TooManyRetries(t *testing.T) {
mDB := dbmock.NewMockStore(gomock.NewController(t)) mDB := dbmock.NewMockStore(gomock.NewController(t))
mDB.EXPECT(). mDB.EXPECT().
InTx(gomock.Any(), &sql.TxOptions{Isolation: sql.LevelRepeatableRead}). InTx(gomock.Any(), &database.TxOptions{Isolation: sql.LevelRepeatableRead}).
Times(5). Times(5).
Return(&pq.Error{Code: pq.ErrorCode("40001")}) Return(&pq.Error{Code: pq.ErrorCode("40001")})
err := database.ReadModifyUpdate(mDB, func(tx database.Store) error { err := database.ReadModifyUpdate(mDB, func(tx database.Store) error {
+1 -2
View File
@@ -2,7 +2,6 @@ package idpsync_test
import ( import (
"context" "context"
"database/sql"
"encoding/json" "encoding/json"
"testing" "testing"
@@ -324,7 +323,7 @@ func TestNoopNoDiff(t *testing.T) {
// and 'UpdateMemberRoles'. // and 'UpdateMemberRoles'.
mDB.EXPECT().InTx( mDB.EXPECT().InTx(
gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(),
).DoAndReturn(func(f func(database.Store) error, _ *sql.TxOptions) error { ).DoAndReturn(func(f func(database.Store) error, _ *database.TxOptions) error {
err := f(mDB) err := f(mDB)
return err return err
}) })
+12 -64
View File
@@ -3,24 +3,19 @@ package promoauth_test
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/http/httptest"
"net/url" "net/url"
"strings" "strings"
"testing" "testing"
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
ptestutil "github.com/prometheus/client_golang/prometheus/testutil"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"golang.org/x/exp/maps"
"golang.org/x/oauth2" "golang.org/x/oauth2"
"github.com/coder/coder/v2/coderd/coderdtest/oidctest" "github.com/coder/coder/v2/coderd/coderdtest/oidctest"
"github.com/coder/coder/v2/coderd/coderdtest/promhelp"
"github.com/coder/coder/v2/coderd/externalauth" "github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/promoauth" "github.com/coder/coder/v2/coderd/promoauth"
"github.com/coder/coder/v2/testutil" "github.com/coder/coder/v2/testutil"
@@ -34,7 +29,7 @@ func TestInstrument(t *testing.T) {
reg := prometheus.NewRegistry() reg := prometheus.NewRegistry()
t.Cleanup(func() { t.Cleanup(func() {
if t.Failed() { if t.Failed() {
t.Log(registryDump(reg)) t.Log(promhelp.RegistryDump(reg))
} }
}) })
@@ -46,7 +41,7 @@ func TestInstrument(t *testing.T) {
const metricname = "coderd_oauth2_external_requests_total" const metricname = "coderd_oauth2_external_requests_total"
count := func(source string) int { count := func(source string) int {
labels["source"] = source labels["source"] = source
return counterValue(t, reg, "coderd_oauth2_external_requests_total", labels) return promhelp.CounterValue(t, reg, "coderd_oauth2_external_requests_total", labels)
} }
factory := promoauth.NewFactory(reg) factory := promoauth.NewFactory(reg)
@@ -58,7 +53,7 @@ func TestInstrument(t *testing.T) {
} }
// 0 Requests before we start // 0 Requests before we start
require.Nil(t, metricValue(t, reg, metricname, labels), "no metrics at start") require.Nil(t, promhelp.MetricValue(t, reg, metricname, labels), "no metrics at start")
noClientCtx := ctx noClientCtx := ctx
// This should never be done, but promoauth should not break the default client // This should never be done, but promoauth should not break the default client
@@ -94,7 +89,7 @@ func TestInstrument(t *testing.T) {
// Verify the default client was not broken. This check is added because we // Verify the default client was not broken. This check is added because we
// extend the http.DefaultTransport. If a `.Clone()` is not done, this can be // extend the http.DefaultTransport. If a `.Clone()` is not done, this can be
// mis-used. It is cheap to run this quick check. // mis-used. It is cheap to run this quick check.
snapshot := registryDump(reg) snapshot := promhelp.RegistryDump(reg)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, req, err := http.NewRequestWithContext(ctx, http.MethodGet,
must[*url.URL](t)(idp.IssuerURL().Parse("/.well-known/openid-configuration")).String(), nil) must[*url.URL](t)(idp.IssuerURL().Parse("/.well-known/openid-configuration")).String(), nil)
require.NoError(t, err) require.NoError(t, err)
@@ -103,7 +98,7 @@ func TestInstrument(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
_ = resp.Body.Close() _ = resp.Body.Close()
require.NoError(t, compare(reg, snapshot), "http default client corrupted") require.NoError(t, promhelp.Compare(reg, snapshot), "http default client corrupted")
} }
func TestGithubRateLimits(t *testing.T) { func TestGithubRateLimits(t *testing.T) {
@@ -214,37 +209,26 @@ func TestGithubRateLimits(t *testing.T) {
} }
pass := true pass := true
if !c.ExpectNoMetrics { if !c.ExpectNoMetrics {
pass = pass && assert.Equal(t, gaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_total", labels), c.Limit, "limit") pass = pass && assert.Equal(t, promhelp.GaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_total", labels), c.Limit, "limit")
pass = pass && assert.Equal(t, gaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_remaining", labels), c.Remaining, "remaining") pass = pass && assert.Equal(t, promhelp.GaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_remaining", labels), c.Remaining, "remaining")
pass = pass && assert.Equal(t, gaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_used", labels), c.Used, "used") pass = pass && assert.Equal(t, promhelp.GaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_used", labels), c.Used, "used")
if !c.at.IsZero() { if !c.at.IsZero() {
until := c.Reset.Sub(c.at) until := c.Reset.Sub(c.at)
// Float accuracy is not great, so we allow a delta of 2 // Float accuracy is not great, so we allow a delta of 2
pass = pass && assert.InDelta(t, gaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_reset_in_seconds", labels), int(until.Seconds()), 2, "reset in") pass = pass && assert.InDelta(t, promhelp.GaugeValue(t, reg, "coderd_oauth2_external_requests_rate_limit_reset_in_seconds", labels), int(until.Seconds()), 2, "reset in")
} }
} else { } else {
pass = pass && assert.Nil(t, metricValue(t, reg, "coderd_oauth2_external_requests_rate_limit_total", labels), "not exists") pass = pass && assert.Nil(t, promhelp.MetricValue(t, reg, "coderd_oauth2_external_requests_rate_limit_total", labels), "not exists")
} }
// Helpful debugging // Helpful debugging
if !pass { if !pass {
t.Log(registryDump(reg)) t.Log(promhelp.RegistryDump(reg))
} }
}) })
} }
} }
func registryDump(reg *prometheus.Registry) string {
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
rec := httptest.NewRecorder()
req, _ := http.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil)
h.ServeHTTP(rec, req)
resp := rec.Result()
data, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return string(data)
}
func must[V any](t *testing.T) func(v V, err error) V { func must[V any](t *testing.T) func(v V, err error) V {
return func(v V, err error) V { return func(v V, err error) V {
t.Helper() t.Helper()
@@ -252,39 +236,3 @@ func must[V any](t *testing.T) func(v V, err error) V {
return v return v
} }
} }
func gaugeValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
labeled := metricValue(t, reg, metricName, labels)
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
return int(labeled.GetGauge().GetValue())
}
func counterValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
labeled := metricValue(t, reg, metricName, labels)
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
return int(labeled.GetCounter().GetValue())
}
func compare(reg prometheus.Gatherer, compare string) error {
return ptestutil.GatherAndCompare(reg, strings.NewReader(compare))
}
func metricValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Metric {
metrics, err := reg.Gather()
require.NoError(t, err)
for _, m := range metrics {
if m.GetName() == metricName {
for _, labeled := range m.GetMetric() {
mLables := make(prometheus.Labels)
for _, v := range labeled.GetLabel() {
mLables[v.GetName()] = v.GetValue()
}
if maps.Equal(mLables, labels) {
return labeled
}
}
}
}
return nil
}
+1 -1
View File
@@ -467,7 +467,7 @@ func (api *API) postTemplateByOrganization(rw http.ResponseWriter, r *http.Reque
templateVersionAudit.New = newTemplateVersion templateVersionAudit.New = newTemplateVersion
return nil return nil
}, nil) }, database.DefaultTXOptions().WithID("postTemplate"))
if err != nil { if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error inserting template.", Message: "Internal error inserting template.",
+3 -3
View File
@@ -735,9 +735,9 @@ func expectDB(t *testing.T, opts ...txExpect) *dbmock.MockStore {
// we expect to be run in a transaction; we use mTx to record the // we expect to be run in a transaction; we use mTx to record the
// "in transaction" calls. // "in transaction" calls.
mDB.EXPECT().InTx( mDB.EXPECT().InTx(
gomock.Any(), gomock.Eq(&sql.TxOptions{Isolation: sql.LevelRepeatableRead}), gomock.Any(), gomock.Eq(&database.TxOptions{Isolation: sql.LevelRepeatableRead}),
). ).
DoAndReturn(func(f func(database.Store) error, _ *sql.TxOptions) error { DoAndReturn(func(f func(database.Store) error, _ *database.TxOptions) error {
err := f(mTx) err := f(mTx)
return err return err
}) })
@@ -763,7 +763,7 @@ func withTemplate(mTx *dbmock.MockStore) {
// withInTx runs the given functions on the same db mock. // withInTx runs the given functions on the same db mock.
func withInTx(mTx *dbmock.MockStore) { func withInTx(mTx *dbmock.MockStore) {
mTx.EXPECT().InTx(gomock.Any(), gomock.Any()).Times(1).DoAndReturn( mTx.EXPECT().InTx(gomock.Any(), gomock.Any()).Times(1).DoAndReturn(
func(f func(store database.Store) error, _ *sql.TxOptions) error { func(f func(store database.Store) error, _ *database.TxOptions) error {
return f(mTx) return f(mTx)
}, },
) )
+12 -8
View File
@@ -1357,14 +1357,18 @@ when required by your organization's security policy.`,
Default: strings.Join(agentmetrics.LabelAll, ","), Default: strings.Join(agentmetrics.LabelAll, ","),
}, },
{ {
Name: "Prometheus Collect Database Metrics", Name: "Prometheus Collect Database Metrics",
Description: "Collect database metrics (may increase charges for metrics storage).", // Some db metrics like transaction information will still be collected.
Flag: "prometheus-collect-db-metrics", // Query metrics blow up the number of unique time series with labels
Env: "CODER_PROMETHEUS_COLLECT_DB_METRICS", // and can be very expensive. So default to not capturing query metrics.
Value: &c.Prometheus.CollectDBMetrics, Description: "Collect database query metrics (may increase charges for metrics storage). " +
Group: &deploymentGroupIntrospectionPrometheus, "If set to false, a reduced set of database metrics are still collected.",
YAML: "collect_db_metrics", Flag: "prometheus-collect-db-metrics",
Default: "false", Env: "CODER_PROMETHEUS_COLLECT_DB_METRICS",
Value: &c.Prometheus.CollectDBMetrics,
Group: &deploymentGroupIntrospectionPrometheus,
YAML: "collect_db_metrics",
Default: "false",
}, },
// Pprof settings // Pprof settings
{ {
+1 -1
View File
@@ -321,7 +321,7 @@ When collecting agent stats, aggregate metrics by a given set of comma-separated
| YAML | <code>introspection.prometheus.collect_db_metrics</code> | | YAML | <code>introspection.prometheus.collect_db_metrics</code> |
| Default | <code>false</code> | | Default | <code>false</code> |
Collect database metrics (may increase charges for metrics storage). Collect database query metrics (may increase charges for metrics storage). If set to false, a reduced set of database metrics are still collected.
### --pprof-enable ### --pprof-enable
+3 -1
View File
@@ -146,7 +146,9 @@ INTROSPECTION / PROMETHEUS OPTIONS:
Collect agent stats (may increase charges for metrics storage). Collect agent stats (may increase charges for metrics storage).
--prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false) --prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false)
Collect database metrics (may increase charges for metrics storage). Collect database query metrics (may increase charges for metrics
storage). If set to false, a reduced set of database metrics are still
collected.
--prometheus-enable bool, $CODER_PROMETHEUS_ENABLE --prometheus-enable bool, $CODER_PROMETHEUS_ENABLE
Serve prometheus metrics on the address defined by prometheus address. Serve prometheus metrics on the address defined by prometheus address.
+3 -2
View File
@@ -104,8 +104,9 @@ func (c *committer) CommitQuota(
permit = true permit = true
consumed = newConsumed consumed = newConsumed
return nil return nil
}, &sql.TxOptions{ }, &database.TxOptions{
Isolation: sql.LevelSerializable, Isolation: sql.LevelSerializable,
TxIdentifier: "commit_quota",
}) })
if err != nil { if err != nil {
return nil, err return nil, err
+2 -2
View File
@@ -73,7 +73,7 @@ func Rotate(ctx context.Context, log slog.Logger, sqlDB *sql.DB, ciphers []Ciphe
} }
} }
return nil return nil
}, &sql.TxOptions{ }, &database.TxOptions{
Isolation: sql.LevelRepeatableRead, Isolation: sql.LevelRepeatableRead,
}) })
if err != nil { if err != nil {
@@ -163,7 +163,7 @@ func Decrypt(ctx context.Context, log slog.Logger, sqlDB *sql.DB, ciphers []Ciph
} }
} }
return nil return nil
}, &sql.TxOptions{ }, &database.TxOptions{
Isolation: sql.LevelRepeatableRead, Isolation: sql.LevelRepeatableRead,
}) })
if err != nil { if err != nil {
+2 -2
View File
@@ -60,7 +60,7 @@ type dbCrypt struct {
database.Store database.Store
} }
func (db *dbCrypt) InTx(function func(database.Store) error, txOpts *sql.TxOptions) error { func (db *dbCrypt) InTx(function func(database.Store) error, txOpts *database.TxOptions) error {
return db.Store.InTx(func(s database.Store) error { return db.Store.InTx(func(s database.Store) error {
return function(&dbCrypt{ return function(&dbCrypt{
primaryCipherDigest: db.primaryCipherDigest, primaryCipherDigest: db.primaryCipherDigest,
@@ -445,5 +445,5 @@ func (db *dbCrypt) ensureEncrypted(ctx context.Context) error {
ActiveKeyDigest: db.primaryCipherDigest, ActiveKeyDigest: db.primaryCipherDigest,
Test: testValue, Test: testValue,
}) })
}, &sql.TxOptions{Isolation: sql.LevelRepeatableRead}) }, &database.TxOptions{Isolation: sql.LevelRepeatableRead})
} }
+1 -1
View File
@@ -773,7 +773,7 @@ func TestEncryptDecryptField(t *testing.T) {
func expectInTx(mdb *dbmock.MockStore) *gomock.Call { func expectInTx(mdb *dbmock.MockStore) *gomock.Call {
return mdb.EXPECT().InTx(gomock.Any(), gomock.Any()).Times(1).DoAndReturn( return mdb.EXPECT().InTx(gomock.Any(), gomock.Any()).Times(1).DoAndReturn(
func(f func(store database.Store) error, _ *sql.TxOptions) error { func(f func(store database.Store) error, _ *database.TxOptions) error {
return f(mdb) return f(mdb)
}, },
) )
+1 -1
View File
@@ -60,7 +60,7 @@ func run() error {
return xerrors.Errorf("stub dbmem: %w", err) return xerrors.Errorf("stub dbmem: %w", err)
} }
err = orderAndStubDatabaseFunctions(filepath.Join(databasePath, "dbmetrics", "dbmetrics.go"), "m", "metricsStore", func(params stubParams) string { err = orderAndStubDatabaseFunctions(filepath.Join(databasePath, "dbmetrics", "querymetrics.go"), "m", "queryMetricsStore", func(params stubParams) string {
return fmt.Sprintf(` return fmt.Sprintf(`
start := time.Now() start := time.Now()
%s := m.s.%s(%s) %s := m.s.%s(%s)