mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
2d7009e50d
## Summary Removes `time.Sleep` calls in two test files by replacing them with deterministic or event-driven alternatives. ### Changes **`coderd/provisionerjobs_test.go`** (34.5s → 0.25s) Replaced `time.Sleep(1500ms)` with a direct SQL `UPDATE` to bump `created_at` by 2 seconds. The sleep existed purely to ensure different timestamps for sort-order testing. The fix is deterministic and cannot flake. Uses `NewDBWithSQLDB` (the test already required real Postgres via `WithDumpOnFailure`). **`coderd/database/pubsub/pubsub_test.go`** (2.05s → 1.3s) Replaced `time.Sleep(1s)` with a `testutil.Eventually` retry loop that publishes and checks for subscriber receipt. This is the idiomatic pattern in the codebase. The old sleep waited for pq.Listener to re-issue LISTEN after reconnect; the new code polls until it actually works.
204 lines
7.4 KiB
Go
204 lines
7.4 KiB
Go
package pubsub_test
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"cdr.dev/slog/v3/sloggers/slogtest"
|
|
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
|
"github.com/coder/coder/v2/coderd/database/pubsub"
|
|
"github.com/coder/coder/v2/testutil"
|
|
)
|
|
|
|
func TestPGPubsub_Metrics(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
logger := testutil.Logger(t)
|
|
connectionURL, err := dbtestutil.Open(t)
|
|
require.NoError(t, err)
|
|
db, err := sql.Open("postgres", connectionURL)
|
|
require.NoError(t, err)
|
|
defer db.Close()
|
|
registry := prometheus.NewRegistry()
|
|
ctx := testutil.Context(t, testutil.WaitLong)
|
|
|
|
uut, err := pubsub.New(ctx, logger, db, connectionURL)
|
|
require.NoError(t, err)
|
|
defer uut.Close()
|
|
|
|
err = registry.Register(uut)
|
|
require.NoError(t, err)
|
|
|
|
// each Gather measures pubsub latency by publishing a message & subscribing to it
|
|
var gatherCount float64
|
|
|
|
metrics, err := registry.Gather()
|
|
gatherCount++
|
|
require.NoError(t, err)
|
|
require.True(t, testutil.PromGaugeHasValue(t, metrics, 0, "coder_pubsub_current_events"))
|
|
require.True(t, testutil.PromGaugeHasValue(t, metrics, 0, "coder_pubsub_current_subscribers"))
|
|
|
|
event := "test"
|
|
data := "testing"
|
|
messageChannel := make(chan []byte)
|
|
unsub0, err := uut.Subscribe(event, func(_ context.Context, message []byte) {
|
|
messageChannel <- message
|
|
})
|
|
require.NoError(t, err)
|
|
defer unsub0()
|
|
go func() {
|
|
err := uut.Publish(event, []byte(data))
|
|
assert.NoError(t, err)
|
|
}()
|
|
_ = testutil.TryReceive(ctx, t, messageChannel)
|
|
|
|
require.Eventually(t, func() bool {
|
|
latencyBytes := gatherCount * pubsub.LatencyMessageLength
|
|
metrics, err = registry.Gather()
|
|
gatherCount++
|
|
assert.NoError(t, err)
|
|
return testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
|
|
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_subscribers") &&
|
|
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_publishes_total", "true") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_subscribes_total", "true") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_messages_total", "normal") &&
|
|
testutil.PromCounterHasValue(t, metrics, float64(len(data))+latencyBytes, "coder_pubsub_received_bytes_total") &&
|
|
testutil.PromCounterHasValue(t, metrics, float64(len(data))+latencyBytes, "coder_pubsub_published_bytes_total") &&
|
|
testutil.PromGaugeAssertion(t, metrics, func(in float64) bool { return in > 0 }, "coder_pubsub_send_latency_seconds") &&
|
|
testutil.PromGaugeAssertion(t, metrics, func(in float64) bool { return in > 0 }, "coder_pubsub_receive_latency_seconds") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_latency_measures_total") &&
|
|
!testutil.PromCounterGathered(t, metrics, "coder_pubsub_latency_measure_errs_total")
|
|
}, testutil.WaitShort, testutil.IntervalFast)
|
|
|
|
colossalSize := 7600
|
|
colossalData := make([]byte, colossalSize)
|
|
for i := range colossalData {
|
|
colossalData[i] = 'q'
|
|
}
|
|
unsub1, err := uut.Subscribe(event, func(_ context.Context, message []byte) {
|
|
messageChannel <- message
|
|
})
|
|
require.NoError(t, err)
|
|
defer unsub1()
|
|
go func() {
|
|
err := uut.Publish(event, colossalData)
|
|
assert.NoError(t, err)
|
|
}()
|
|
// should get 2 messages because we have 2 subs
|
|
_ = testutil.TryReceive(ctx, t, messageChannel)
|
|
_ = testutil.TryReceive(ctx, t, messageChannel)
|
|
|
|
require.Eventually(t, func() bool {
|
|
latencyBytes := gatherCount * pubsub.LatencyMessageLength
|
|
metrics, err = registry.Gather()
|
|
gatherCount++
|
|
assert.NoError(t, err)
|
|
return testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
|
|
testutil.PromGaugeHasValue(t, metrics, 2, "coder_pubsub_current_subscribers") &&
|
|
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
|
|
testutil.PromCounterHasValue(t, metrics, 1+gatherCount, "coder_pubsub_publishes_total", "true") &&
|
|
testutil.PromCounterHasValue(t, metrics, 1+gatherCount, "coder_pubsub_subscribes_total", "true") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_messages_total", "normal") &&
|
|
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "colossal") &&
|
|
testutil.PromCounterHasValue(t, metrics, float64(colossalSize+len(data))+latencyBytes, "coder_pubsub_received_bytes_total") &&
|
|
testutil.PromCounterHasValue(t, metrics, float64(colossalSize+len(data))+latencyBytes, "coder_pubsub_published_bytes_total") &&
|
|
testutil.PromGaugeAssertion(t, metrics, func(in float64) bool { return in > 0 }, "coder_pubsub_send_latency_seconds") &&
|
|
testutil.PromGaugeAssertion(t, metrics, func(in float64) bool { return in > 0 }, "coder_pubsub_receive_latency_seconds") &&
|
|
testutil.PromCounterHasValue(t, metrics, gatherCount, "coder_pubsub_latency_measures_total") &&
|
|
!testutil.PromCounterGathered(t, metrics, "coder_pubsub_latency_measure_errs_total")
|
|
}, testutil.WaitShort, testutil.IntervalFast)
|
|
}
|
|
|
|
func TestPGPubsubDriver(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := testutil.Context(t, testutil.WaitLong)
|
|
logger := slogtest.Make(t, &slogtest.Options{
|
|
IgnoreErrors: true,
|
|
}).Leveled(slog.LevelDebug)
|
|
|
|
connectionURL, err := dbtestutil.Open(t)
|
|
require.NoError(t, err)
|
|
|
|
// use a separate subber and pubber so we can keep track of listener connections
|
|
db, err := sql.Open("postgres", connectionURL)
|
|
require.NoError(t, err)
|
|
defer db.Close()
|
|
pubber, err := pubsub.New(ctx, logger, db, connectionURL)
|
|
require.NoError(t, err)
|
|
defer pubber.Close()
|
|
|
|
// use a connector that sends us the connections for the subber
|
|
subDriver := dbtestutil.NewDriver()
|
|
defer subDriver.Close()
|
|
tconn, err := subDriver.Connector(connectionURL)
|
|
require.NoError(t, err)
|
|
tcdb := sql.OpenDB(tconn)
|
|
defer tcdb.Close()
|
|
subber, err := pubsub.New(ctx, logger, tcdb, connectionURL)
|
|
require.NoError(t, err)
|
|
defer subber.Close()
|
|
|
|
// test that we can publish and subscribe
|
|
gotChan := make(chan struct{}, 1)
|
|
defer close(gotChan)
|
|
subCancel, err := subber.Subscribe("test", func(_ context.Context, _ []byte) {
|
|
select {
|
|
case gotChan <- struct{}{}:
|
|
default:
|
|
}
|
|
})
|
|
require.NoError(t, err)
|
|
defer subCancel()
|
|
|
|
// send a message
|
|
err = pubber.Publish("test", []byte("hello"))
|
|
require.NoError(t, err)
|
|
|
|
// wait for the message
|
|
_ = testutil.TryReceive(ctx, t, gotChan)
|
|
|
|
// read out first connection
|
|
firstConn := testutil.TryReceive(ctx, t, subDriver.Connections)
|
|
|
|
// drop the underlying connection being used by the pubsub
|
|
// the pq.Listener should reconnect and repopulate it's listeners
|
|
// so old subscriptions should still work
|
|
err = firstConn.Close()
|
|
require.NoError(t, err)
|
|
|
|
// wait for the reconnect
|
|
_ = testutil.TryReceive(ctx, t, subDriver.Connections)
|
|
|
|
// The raw connection notification is sent before the
|
|
// pq.Listener re-issues LISTEN on the new connection.
|
|
// Rather than sleeping a fixed duration, retry publishing
|
|
// until the subscriber receives a message, which proves
|
|
// that the LISTEN has been re-established.
|
|
testutil.Eventually(ctx, t, func(_ context.Context) bool {
|
|
// Drain any stale signals before publishing.
|
|
select {
|
|
case <-gotChan:
|
|
default:
|
|
}
|
|
err := pubber.Publish("test", []byte("hello-again"))
|
|
if err != nil {
|
|
return false
|
|
}
|
|
select {
|
|
case <-gotChan:
|
|
return true
|
|
case <-time.After(testutil.IntervalFast):
|
|
return false
|
|
}
|
|
}, testutil.IntervalMedium, "subscriber did not receive message after reconnect")
|
|
}
|