mirror of
https://github.com/coder/coder.git
synced 2026-06-03 04:58:23 +00:00
fix: wait for server tailnet background routines to exit on Close (#15183)
fixes https://github.com/coder/internal/issues/114 We need to wait for ServerTailnet goroutines to finish when closing down, otherwise we can race with the shutdown of coderd & the coordinator, which causes errors.
This commit is contained in:
+22
-9
@@ -91,13 +91,15 @@ func NewServerTailnet(
|
||||
})
|
||||
}
|
||||
|
||||
derpMapUpdaterClosed := make(chan struct{})
|
||||
bgRoutines := &sync.WaitGroup{}
|
||||
originalDerpMap := derpMapFn()
|
||||
// it's important to set the DERPRegionDialer above _before_ we set the DERP map so that if
|
||||
// there is an embedded relay, we use the local in-memory dialer.
|
||||
conn.SetDERPMap(originalDerpMap)
|
||||
bgRoutines.Add(1)
|
||||
go func() {
|
||||
defer close(derpMapUpdaterClosed)
|
||||
defer bgRoutines.Done()
|
||||
defer logger.Debug(ctx, "polling DERPMap exited")
|
||||
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
@@ -120,7 +122,7 @@ func NewServerTailnet(
|
||||
tn := &ServerTailnet{
|
||||
ctx: serverCtx,
|
||||
cancel: cancel,
|
||||
derpMapUpdaterClosed: derpMapUpdaterClosed,
|
||||
bgRoutines: bgRoutines,
|
||||
logger: logger,
|
||||
tracer: traceProvider.Tracer(tracing.TracerName),
|
||||
conn: conn,
|
||||
@@ -170,8 +172,15 @@ func NewServerTailnet(
|
||||
// registering the callback also triggers send of the initial node
|
||||
tn.coordinatee.SetNodeCallback(tn.nodeCallback)
|
||||
|
||||
go tn.watchAgentUpdates()
|
||||
go tn.expireOldAgents()
|
||||
tn.bgRoutines.Add(2)
|
||||
go func() {
|
||||
defer tn.bgRoutines.Done()
|
||||
tn.watchAgentUpdates()
|
||||
}()
|
||||
go func() {
|
||||
defer tn.bgRoutines.Done()
|
||||
tn.expireOldAgents()
|
||||
}()
|
||||
return tn, nil
|
||||
}
|
||||
|
||||
@@ -204,6 +213,7 @@ func (s *ServerTailnet) Collect(metrics chan<- prometheus.Metric) {
|
||||
}
|
||||
|
||||
func (s *ServerTailnet) expireOldAgents() {
|
||||
defer s.logger.Debug(s.ctx, "stopped expiring old agents")
|
||||
const (
|
||||
tick = 5 * time.Minute
|
||||
cutoff = 30 * time.Minute
|
||||
@@ -255,6 +265,7 @@ func (s *ServerTailnet) doExpireOldAgents(cutoff time.Duration) {
|
||||
}
|
||||
|
||||
func (s *ServerTailnet) watchAgentUpdates() {
|
||||
defer s.logger.Debug(s.ctx, "stopped watching agent updates")
|
||||
for {
|
||||
conn := s.getAgentConn()
|
||||
resp, ok := conn.NextUpdate(s.ctx)
|
||||
@@ -317,9 +328,9 @@ func (s *ServerTailnet) reinitCoordinator() {
|
||||
}
|
||||
|
||||
type ServerTailnet struct {
|
||||
ctx context.Context
|
||||
cancel func()
|
||||
derpMapUpdaterClosed chan struct{}
|
||||
ctx context.Context
|
||||
cancel func()
|
||||
bgRoutines *sync.WaitGroup
|
||||
|
||||
logger slog.Logger
|
||||
tracer trace.Tracer
|
||||
@@ -532,10 +543,12 @@ func (c *netConnCloser) Close() error {
|
||||
}
|
||||
|
||||
func (s *ServerTailnet) Close() error {
|
||||
s.logger.Info(s.ctx, "closing server tailnet")
|
||||
defer s.logger.Debug(s.ctx, "server tailnet close complete")
|
||||
s.cancel()
|
||||
_ = s.conn.Close()
|
||||
s.transport.CloseIdleConnections()
|
||||
<-s.derpMapUpdaterClosed
|
||||
s.bgRoutines.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -419,6 +419,8 @@ func (s *Server) RegisterNow() error {
|
||||
}
|
||||
|
||||
func (s *Server) Close() error {
|
||||
s.Logger.Info(s.ctx, "closing workspace proxy server")
|
||||
defer s.Logger.Debug(s.ctx, "finished closing workspace proxy server")
|
||||
s.cancel()
|
||||
|
||||
var err error
|
||||
|
||||
Reference in New Issue
Block a user