mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
2d7dd73106
A cursory glance at Grafana for error-level logs showed that the
following log line was appearing regularly:
```
2026-03-11 05:17:59.169 [erro] coderd: failed to heartbeat ping trace=xxx span=xxx request_id=xxx ...
error= failed to ping:
github.com/coder/coder/v2/coderd/httpapi.pingWithTimeout
/home/runner/work/coder/coder/coderd/httpapi/websocket.go:46
- failed to ping: failed to wait for pong: context canceled
```
This seems to be an "expected" error when the parent context is canceled
so doesn't make sense to log at level ERROR.
NOTE: I also saw this a bit and wonder if it also deserves similar
treatment:
```
2026-03-11 05:10:53.229 [erro] coderd.inbox_notifications_watcher: failed to heartbeat ping trace=xxx span=xxx request_id=xxx ...
error= failed to ping:
github.com/coder/coder/v2/coderd/httpapi.pingWithTimeout
/home/runner/work/coder/coder/coderd/httpapi/websocket.go:46
- failed to ping: failed to write control frame opPing: use of closed network connection
```
54 lines
1.3 KiB
Go
54 lines
1.3 KiB
Go
package httpapi
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"time"
|
|
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/websocket"
|
|
)
|
|
|
|
const HeartbeatInterval time.Duration = 15 * time.Second
|
|
|
|
// HeartbeatClose loops to ping a WebSocket to keep it alive. It calls `exit` on ping
|
|
// failure.
|
|
func HeartbeatClose(ctx context.Context, logger slog.Logger, exit func(), conn *websocket.Conn) {
|
|
ticker := time.NewTicker(HeartbeatInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
}
|
|
err := pingWithTimeout(ctx, conn, HeartbeatInterval)
|
|
if err != nil {
|
|
// context.DeadlineExceeded is expected when the client disconnects without sending a close frame.
|
|
// context.Canceled is expected when the request context is canceled.
|
|
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
|
logger.Debug(ctx, "heartbeat ping stopped", slog.Error(err))
|
|
} else {
|
|
logger.Error(ctx, "failed to heartbeat ping", slog.Error(err))
|
|
}
|
|
_ = conn.Close(websocket.StatusGoingAway, "Ping failed")
|
|
exit()
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func pingWithTimeout(ctx context.Context, conn *websocket.Conn, timeout time.Duration) error {
|
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
err := conn.Ping(ctx)
|
|
if err != nil {
|
|
return xerrors.Errorf("failed to ping: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|