fix: resolve Test_batcherFlush/RetriesOnTransientFailure flake (#24112)

fixes https://github.com/coder/internal/issues/1452
This commit is contained in:
Jon Ayers
2026-04-07 13:46:26 -05:00
committed by GitHub
parent c5f1a2fccf
commit 08bd9e672a
2 changed files with 25 additions and 19 deletions
@@ -468,9 +468,10 @@ func (b *DBBatcher) retryLoop() {
func (b *DBBatcher) retryBatch(params database.BatchUpsertConnectionLogsParams) { func (b *DBBatcher) retryBatch(params database.BatchUpsertConnectionLogsParams) {
count := len(params.ID) count := len(params.ID)
for attempt := range maxRetries { for attempt := range maxRetries {
t := time.NewTimer(retryInterval) t := b.clock.NewTimer(retryInterval, "connectionLogBatcher", "retryBackoff")
select { select {
case <-b.ctx.Done(): case <-b.ctx.Done():
t.Stop()
b.shutdownBatch(params) b.shutdownBatch(params)
return return
case <-t.C: case <-t.C:
@@ -355,15 +355,20 @@ func Test_batcherFlush(t *testing.T) {
store := dbmock.NewMockStore(ctrl) store := dbmock.NewMockStore(ctrl)
clock := quartz.NewMock(t) clock := quartz.NewMock(t)
scheduledTrap := clock.Trap().TimerReset("connectionLogBatcher", "scheduledFlush") // Trap the capacity flush (fires when batch reaches maxBatchSize).
defer scheduledTrap.Close() capacityTrap := clock.Trap().TimerReset("connectionLogBatcher", "capacityFlush")
defer capacityTrap.Close()
b := NewDBBatcher(ctx, store, log, WithClock(clock), WithBatchSize(100)) // Trap the retry backoff timer created by retryBatch.
retryTrap := clock.Trap().NewTimer("connectionLogBatcher", "retryBackoff")
defer retryTrap.Close()
// Batch size of 1: consuming the item triggers an immediate
// capacity flush, avoiding the timer/itemCh select race.
b := NewDBBatcher(ctx, store, log, WithClock(clock), WithBatchSize(1))
evt := fakeConnectEvent(uuid.New(), "agent1", uuid.New()) evt := fakeConnectEvent(uuid.New(), "agent1", uuid.New())
// First call (synchronous in flush) fails, then the
// retry worker retries after the backoff and succeeds.
gomock.InOrder( gomock.InOrder(
store.EXPECT(). store.EXPECT().
BatchUpsertConnectionLogs(gomock.Any(), gomock.Any()). BatchUpsertConnectionLogs(gomock.Any(), gomock.Any()).
@@ -380,14 +385,15 @@ func Test_batcherFlush(t *testing.T) {
require.NoError(t, b.Upsert(ctx, evt)) require.NoError(t, b.Upsert(ctx, evt))
// Trigger a scheduled flush while the batcher is still // Item consumed → capacity flush fires → transient error →
// running. The synchronous write fails and queues to // batch queued to retryCh → timer reset trapped.
// retryCh. The retry worker picks it up after a real- capacityTrap.MustWait(ctx).MustRelease(ctx)
// time 1s delay and succeeds.
clock.Advance(defaultFlushInterval).MustWait(ctx) // Retry worker creates a timer — trap it, release, advance.
scheduledTrap.MustWait(ctx).MustRelease(ctx) retryCall := retryTrap.MustWait(ctx)
retryCall.MustRelease(ctx)
clock.Advance(retryInterval).MustWait(ctx)
// Wait for the retry to complete (real-time 1s delay).
require.NoError(t, b.Close()) require.NoError(t, b.Close())
}) })
@@ -400,10 +406,10 @@ func Test_batcherFlush(t *testing.T) {
store := dbmock.NewMockStore(ctrl) store := dbmock.NewMockStore(ctrl)
clock := quartz.NewMock(t) clock := quartz.NewMock(t)
scheduledTrap := clock.Trap().TimerReset("connectionLogBatcher", "scheduledFlush") capacityTrap := clock.Trap().TimerReset("connectionLogBatcher", "capacityFlush")
defer scheduledTrap.Close() defer capacityTrap.Close()
b := NewDBBatcher(ctx, store, log, WithClock(clock), WithBatchSize(100)) b := NewDBBatcher(ctx, store, log, WithClock(clock), WithBatchSize(1))
evt := fakeConnectEvent(uuid.New(), "agent1", uuid.New()) evt := fakeConnectEvent(uuid.New(), "agent1", uuid.New())
@@ -428,10 +434,9 @@ func Test_batcherFlush(t *testing.T) {
}). }).
AnyTimes() AnyTimes()
// Send event and trigger flush — fails, queues. // Send event — capacity flush triggers immediately.
require.NoError(t, b.Upsert(ctx, evt)) require.NoError(t, b.Upsert(ctx, evt))
clock.Advance(defaultFlushInterval).MustWait(ctx) capacityTrap.MustWait(ctx).MustRelease(ctx)
scheduledTrap.MustWait(ctx).MustRelease(ctx)
// Close triggers shutdown. The retry worker drains // Close triggers shutdown. The retry worker drains
// retryCh and writes the batch via writeBatch. // retryCh and writes the batch via writeBatch.