From 533c6dcbbec017da27f239e86518c479471edc2f Mon Sep 17 00:00:00 2001 From: Ethan <39577870+ethanndickson@users.noreply.github.com> Date: Fri, 6 Jun 2025 19:28:10 +1000 Subject: [PATCH] fix: remove error log when notification manager is already closed (#18264) Closes https://github.com/coder/internal/issues/677 Resolves flakes such as: ``` $ go test -race -run "TestRunStopRace" github.com/coder/coder/v2/coderd/notifications -count=10000 -parallel $(nproc) --- FAIL: TestRunStopRace (0.00s) t.go:106: 2025-06-06 02:44:39.348 [debu] notifications-manager: notification manager started t.go:106: 2025-06-06 02:44:39.348 [debu] notifications-manager: graceful stop requested t.go:106: 2025-06-06 02:44:39.348 [debu] notifications-manager: notification manager stopped t.go:115: 2025-06-06 02:44:39.348 [erro] notifications-manager: notification manager stopped with error ... error= manager already closed: github.com/coder/coder/v2/coderd/notifications.(*Manager).loop /home/coder/coder/coderd/notifications/manager.go:166 *** slogtest: log detected at level ERROR; TEST FAILURE *** --- FAIL: TestRunStopRace (0.00s) t.go:106: 2025-06-06 02:44:41.632 [debu] notifications-manager: notification manager started t.go:106: 2025-06-06 02:44:41.632 [debu] notifications-manager: graceful stop requested t.go:106: 2025-06-06 02:44:41.632 [debu] notifications-manager: notification manager stopped t.go:115: 2025-06-06 02:44:41.633 [erro] notifications-manager: notification manager stopped with error ... error= manager already closed: github.com/coder/coder/v2/coderd/notifications.(*Manager).loop /home/coder/coder/coderd/notifications/manager.go:166 *** slogtest: log detected at level ERROR; TEST FAILURE *** FAIL FAIL github.com/coder/coder/v2/coderd/notifications 6.847s FAIL ``` These error logs are caused as a result of the `Manager` `Run` start operation being asynchronous. In the flaking test case we immediately call `Stop` after `Run`. It's possible for `Stop` to be scheduled to completion before the goroutine spawned by `Run` calls `loop` and checks `closed`. If this happens, `loop` returns an error and produces the error log. We'll address this by replacing this specific error log with a warning log. ``` $ go test -run "TestRunStopRace" github.com/coder/coder/v2/coderd/notifications -count=10000 -parallel $(nproc) ok github.com/coder/coder/v2/coderd/notifications 1.294s $ go test -race github.com/coder/coder/v2/coderd/notifications -count=100 -parallel $(nproc) ok github.com/coder/coder/v2/coderd/notifications 26.525s ``` --- coderd/notifications/manager.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/coderd/notifications/manager.go b/coderd/notifications/manager.go index 1a2c418a01..11588a09fb 100644 --- a/coderd/notifications/manager.go +++ b/coderd/notifications/manager.go @@ -135,6 +135,8 @@ func (m *Manager) WithHandlers(reg map[database.NotificationMethod]Handler) { m.handlers = reg } +var ErrManagerAlreadyClosed = xerrors.New("manager already closed") + // Run initiates the control loop in the background, which spawns a given number of notifier goroutines. // Manager requires system-level permissions to interact with the store. // Run is only intended to be run once. @@ -146,7 +148,11 @@ func (m *Manager) Run(ctx context.Context) { go func() { err := m.loop(ctx) if err != nil { - m.log.Error(ctx, "notification manager stopped with error", slog.Error(err)) + if xerrors.Is(err, ErrManagerAlreadyClosed) { + m.log.Warn(ctx, "notification manager stopped with error", slog.Error(err)) + } else { + m.log.Error(ctx, "notification manager stopped with error", slog.Error(err)) + } } }() }) @@ -163,7 +169,7 @@ func (m *Manager) loop(ctx context.Context) error { m.mu.Lock() if m.closed { m.mu.Unlock() - return xerrors.New("manager already closed") + return ErrManagerAlreadyClosed } var eg errgroup.Group