Files
coder/vpn/client.go
T
Ethan 0076e8479f chore(vpn): send ping results over tunnel (#18200)
Closes #17982.

The purpose of this PR is to expose network latency via the API used by Coder Desktop.

This PR has the tunnel ping all known agents every 5 seconds, in order to produce an instance of:
```proto
message LastPing {
	// latency is the RTT of the ping to the agent.
	google.protobuf.Duration latency = 1;
	// did_p2p indicates whether the ping was sent P2P, or over DERP.
	bool did_p2p = 2;
	// preferred_derp is the human readable name of the preferred DERP region,
	// or the region used for the last ping, if it was sent over DERP.
	string preferred_derp = 3;
	// preferred_derp_latency is the last known latency to the preferred DERP
	// region. Unset if the region does not appear in the DERP map.
	optional google.protobuf.Duration preferred_derp_latency = 4;
}
```
The contents of this message are stored and included on all subsequent upsertions of the agent. 
Note that we upsert existing agents every 5 seconds to update the `last_handshake` value.

On the desktop apps, this message will be used to produce a tooltip similar to that of the VS Code extension:
<img width="495" alt="image" src="https://github.com/user-attachments/assets/d8b65f3d-f536-4c64-9af9-35c1a42c92d2" />
(wording not final)

Unlike the VS Code extension, we omit:
- The Latency of *all* available DERP regions. It seems not ideal to send a copy of this entire map for every online agent, and it certainly doesn't make sense for it to be on the `Agent` or `LastPing` message. 
If we do want to expose this info on Coder Desktop, we should consider how best to do so; maybe we want to include it on a more generic `Netcheck` message.
- The current throughput (Bytes up/down). This is out of scope of the linked issue, and is non-trivial to implement. I'm also not sure of the value given the frequency we're doing these status updates (every 5 seconds).
If we want to expose it, it'll be in a separate PR.

<img width="343" alt="image" src="https://github.com/user-attachments/assets/8447d03b-9721-4111-8ac1-332d70a1e8f1" />
2025-06-06 14:18:57 +10:00

196 lines
5.5 KiB
Go

package vpn
import (
"context"
"net/http"
"net/netip"
"net/url"
"time"
"golang.org/x/xerrors"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/dns"
"tailscale.com/net/netmon"
"tailscale.com/tailcfg"
"tailscale.com/wgengine/router"
"github.com/google/uuid"
"github.com/tailscale/wireguard-go/tun"
"cdr.dev/slog"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/tailnet/proto"
"github.com/coder/quartz"
"github.com/coder/websocket"
)
type Conn interface {
CurrentWorkspaceState() (tailnet.WorkspaceUpdate, error)
GetPeerDiagnostics(peerID uuid.UUID) tailnet.PeerDiagnostics
Ping(ctx context.Context, agentID uuid.UUID) (time.Duration, bool, *ipnstate.PingResult, error)
Node() *tailnet.Node
DERPMap() *tailcfg.DERPMap
Close() error
}
type vpnConn struct {
*tailnet.Conn
cancelFn func()
controller *tailnet.Controller
updatesCtrl *tailnet.TunnelAllWorkspaceUpdatesController
}
func (c *vpnConn) Ping(ctx context.Context, agentID uuid.UUID) (time.Duration, bool, *ipnstate.PingResult, error) {
return c.Conn.Ping(ctx, tailnet.TailscaleServicePrefix.AddrFromUUID(agentID))
}
func (c *vpnConn) CurrentWorkspaceState() (tailnet.WorkspaceUpdate, error) {
return c.updatesCtrl.CurrentState()
}
func (c *vpnConn) Close() error {
c.cancelFn()
<-c.controller.Closed()
return c.Conn.Close()
}
type client struct{}
type Client interface {
NewConn(ctx context.Context, serverURL *url.URL, token string, options *Options) (Conn, error)
}
func NewClient() Client {
return &client{}
}
type Options struct {
Headers http.Header
Logger slog.Logger
DNSConfigurator dns.OSConfigurator
Router router.Router
TUNDevice tun.Device
WireguardMonitor *netmon.Monitor
UpdateHandler tailnet.UpdatesHandler
}
func (*client) NewConn(initCtx context.Context, serverURL *url.URL, token string, options *Options) (vpnC Conn, err error) {
if options == nil {
options = &Options{}
}
if options.Headers == nil {
options.Headers = http.Header{}
}
headers := options.Headers
sdk := codersdk.New(serverURL)
sdk.SetSessionToken(token)
sdk.HTTPClient.Transport = &codersdk.HeaderTransport{
Transport: http.DefaultTransport,
Header: headers,
}
// New context, separate from initCtx. We don't want to cancel the
// connection if initCtx is canceled.
ctx, cancel := context.WithCancel(context.Background())
defer func() {
if err != nil {
cancel()
}
}()
rpcURL, err := sdk.URL.Parse("/api/v2/tailnet")
if err != nil {
return nil, xerrors.Errorf("parse rpc url: %w", err)
}
me, err := sdk.User(initCtx, codersdk.Me)
if err != nil {
return nil, xerrors.Errorf("get user: %w", err)
}
connInfo, err := workspacesdk.New(sdk).AgentConnectionInfoGeneric(initCtx)
if err != nil {
return nil, xerrors.Errorf("get connection info: %w", err)
}
// default to DNS suffix of "coder" if the server hasn't set it (might be too old).
dnsNameOptions := tailnet.DNSNameOptions{Suffix: tailnet.CoderDNSSuffix}
dnsMatch := tailnet.CoderDNSSuffix
if connInfo.HostnameSuffix != "" {
dnsNameOptions.Suffix = connInfo.HostnameSuffix
dnsMatch = connInfo.HostnameSuffix
}
headers.Set(codersdk.SessionTokenHeader, token)
dialer := workspacesdk.NewWebsocketDialer(options.Logger, rpcURL, &websocket.DialOptions{
HTTPClient: sdk.HTTPClient,
HTTPHeader: headers,
CompressionMode: websocket.CompressionDisabled,
}, workspacesdk.WithWorkspaceUpdates(&proto.WorkspaceUpdatesRequest{
WorkspaceOwnerId: tailnet.UUIDToByteSlice(me.ID),
}))
ip := tailnet.CoderServicePrefix.RandomAddr()
conn, err := tailnet.NewConn(&tailnet.Options{
Addresses: []netip.Prefix{netip.PrefixFrom(ip, 128)},
DERPMap: connInfo.DERPMap,
DERPHeader: &headers,
DERPForceWebSockets: connInfo.DERPForceWebSockets,
Logger: options.Logger,
BlockEndpoints: connInfo.DisableDirectConnections,
DNSConfigurator: options.DNSConfigurator,
Router: options.Router,
TUNDev: options.TUNDevice,
WireguardMonitor: options.WireguardMonitor,
DNSMatchDomain: dnsMatch,
})
if err != nil {
return nil, xerrors.Errorf("create tailnet: %w", err)
}
defer func() {
if err != nil {
_ = conn.Close()
}
}()
clk := quartz.NewReal()
controller := tailnet.NewController(options.Logger, dialer)
coordCtrl := tailnet.NewTunnelSrcCoordController(options.Logger, conn)
controller.ResumeTokenCtrl = tailnet.NewBasicResumeTokenController(options.Logger, clk)
controller.CoordCtrl = coordCtrl
controller.DERPCtrl = tailnet.NewBasicDERPController(options.Logger, conn)
updatesCtrl := tailnet.NewTunnelAllWorkspaceUpdatesController(
options.Logger,
coordCtrl,
tailnet.WithDNS(conn, me.Username, dnsNameOptions),
tailnet.WithHandler(options.UpdateHandler),
)
controller.WorkspaceUpdatesCtrl = updatesCtrl
controller.Run(ctx)
options.Logger.Debug(ctx, "running tailnet API v2+ connector")
select {
case <-initCtx.Done():
return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", initCtx.Err())
case err = <-dialer.Connected():
if err != nil {
options.Logger.Error(ctx, "failed to connect to tailnet v2+ API", slog.Error(err))
return nil, xerrors.Errorf("start connector: %w", err)
}
options.Logger.Debug(ctx, "connected to tailnet v2+ API")
}
return &vpnConn{
Conn: conn,
cancelFn: cancel,
controller: controller,
updatesCtrl: updatesCtrl,
}, nil
}