mirror of
https://github.com/coder/coder.git
synced 2026-06-03 21:18:24 +00:00
bddb808b25
Fixes all our Go file imports to match the preferred spec that we've _mostly_ been using. For example: ``` import ( "context" "time" "github.com/prometheus/client_golang/prometheus" "golang.org/x/xerrors" "gopkg.in/natefinch/lumberjack.v2" "cdr.dev/slog/v3" "github.com/coder/coder/v2/codersdk/agentsdk" "github.com/coder/serpent" ) ``` 3 groups: standard library, 3rd partly libs, Coder libs. This PR makes the change across the codebase. The PR in the stack above modifies our formatting to maintain this state of affairs, and is a separate PR so it's possible to review that one in detail.
796 lines
22 KiB
Go
796 lines
22 KiB
Go
package tailnet
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"maps"
|
|
"net/netip"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"go4.org/netipx"
|
|
"tailscale.com/ipn/ipnstate"
|
|
"tailscale.com/net/dns"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/types/dnstype"
|
|
"tailscale.com/types/ipproto"
|
|
"tailscale.com/types/key"
|
|
"tailscale.com/types/netmap"
|
|
"tailscale.com/util/dnsname"
|
|
"tailscale.com/wgengine"
|
|
"tailscale.com/wgengine/filter"
|
|
"tailscale.com/wgengine/router"
|
|
"tailscale.com/wgengine/wgcfg"
|
|
"tailscale.com/wgengine/wgcfg/nmcfg"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/tailnet/proto"
|
|
"github.com/coder/quartz"
|
|
)
|
|
|
|
const lostTimeout = 15 * time.Minute
|
|
|
|
// CoderDNSSuffix is the default DNS suffix that we append to Coder DNS
|
|
// records.
|
|
const (
|
|
CoderDNSSuffix = "coder"
|
|
CoderDNSSuffixFQDN = dnsname.FQDN(CoderDNSSuffix + ".")
|
|
)
|
|
|
|
// engineConfigurable is the subset of wgengine.Engine that we use for configuration.
|
|
//
|
|
// This allows us to test configuration code without faking the whole interface.
|
|
type engineConfigurable interface {
|
|
UpdateStatus(*ipnstate.StatusBuilder)
|
|
SetNetworkMap(*netmap.NetworkMap)
|
|
Reconfig(*wgcfg.Config, *router.Config, *dns.Config, *tailcfg.Debug) error
|
|
SetDERPMap(*tailcfg.DERPMap)
|
|
SetFilter(*filter.Filter)
|
|
}
|
|
|
|
type phase int
|
|
|
|
const (
|
|
idle phase = iota
|
|
configuring
|
|
closed
|
|
)
|
|
|
|
type phased struct {
|
|
sync.Cond
|
|
phase phase
|
|
}
|
|
|
|
type configMaps struct {
|
|
phased
|
|
netmapDirty bool
|
|
derpMapDirty bool
|
|
filterDirty bool
|
|
closing bool
|
|
|
|
engine engineConfigurable
|
|
static netmap.NetworkMap
|
|
|
|
hosts map[dnsname.FQDN][]netip.Addr
|
|
peers map[uuid.UUID]*peerLifecycle
|
|
addresses []netip.Prefix
|
|
derpMap *tailcfg.DERPMap
|
|
logger slog.Logger
|
|
blockEndpoints bool
|
|
matchDomain dnsname.FQDN
|
|
|
|
// for testing
|
|
clock quartz.Clock
|
|
}
|
|
|
|
func newConfigMaps(
|
|
logger slog.Logger, engine engineConfigurable,
|
|
nodeID tailcfg.NodeID, nodeKey key.NodePrivate, discoKey key.DiscoPublic,
|
|
matchDomain dnsname.FQDN,
|
|
) *configMaps {
|
|
pubKey := nodeKey.Public()
|
|
c := &configMaps{
|
|
phased: phased{Cond: *(sync.NewCond(&sync.Mutex{}))},
|
|
logger: logger,
|
|
engine: engine,
|
|
hosts: make(map[dnsname.FQDN][]netip.Addr),
|
|
static: netmap.NetworkMap{
|
|
SelfNode: &tailcfg.Node{
|
|
ID: nodeID,
|
|
Key: pubKey,
|
|
DiscoKey: discoKey,
|
|
},
|
|
NodeKey: pubKey,
|
|
PrivateKey: nodeKey,
|
|
PacketFilter: []filter.Match{{
|
|
// Allow any protocol!
|
|
IPProto: []ipproto.Proto{ipproto.TCP, ipproto.UDP, ipproto.ICMPv4, ipproto.ICMPv6, ipproto.SCTP},
|
|
// Allow traffic sourced from anywhere.
|
|
Srcs: []netip.Prefix{
|
|
netip.PrefixFrom(netip.AddrFrom4([4]byte{}), 0),
|
|
netip.PrefixFrom(netip.AddrFrom16([16]byte{}), 0),
|
|
},
|
|
// Allow traffic to route anywhere.
|
|
Dsts: []filter.NetPortRange{
|
|
{
|
|
Net: netip.PrefixFrom(netip.AddrFrom4([4]byte{}), 0),
|
|
Ports: filter.PortRange{
|
|
First: 0,
|
|
Last: 65535,
|
|
},
|
|
},
|
|
{
|
|
Net: netip.PrefixFrom(netip.AddrFrom16([16]byte{}), 0),
|
|
Ports: filter.PortRange{
|
|
First: 0,
|
|
Last: 65535,
|
|
},
|
|
},
|
|
},
|
|
Caps: []filter.CapMatch{},
|
|
}},
|
|
},
|
|
peers: make(map[uuid.UUID]*peerLifecycle),
|
|
matchDomain: matchDomain,
|
|
clock: quartz.NewReal(),
|
|
}
|
|
go c.configLoop()
|
|
return c
|
|
}
|
|
|
|
// configLoop waits for the config to be dirty, then reconfigures the engine.
|
|
// It is internal to configMaps
|
|
func (c *configMaps) configLoop() {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
defer func() {
|
|
c.phase = closed
|
|
c.Broadcast()
|
|
}()
|
|
for {
|
|
for !(c.closing || c.netmapDirty || c.filterDirty || c.derpMapDirty) {
|
|
c.phase = idle
|
|
c.Wait()
|
|
}
|
|
if c.closing {
|
|
c.logger.Debug(context.Background(), "closing configMaps configLoop")
|
|
return
|
|
}
|
|
// queue up the reconfiguration actions we will take while we have
|
|
// the configMaps locked. We will execute them while unlocked to avoid
|
|
// blocking during reconfig.
|
|
actions := make([]func(), 0, 3)
|
|
if c.derpMapDirty {
|
|
derpMap := c.derpMapLocked()
|
|
actions = append(actions, func() {
|
|
c.logger.Debug(context.Background(), "updating engine DERP map", slog.F("derp_map", (*derpMapStringer)(derpMap)))
|
|
c.engine.SetDERPMap(derpMap)
|
|
})
|
|
}
|
|
if c.netmapDirty {
|
|
nm := c.netMapLocked()
|
|
hosts := c.hostsLocked()
|
|
actions = append(actions, func() {
|
|
c.logger.Debug(context.Background(), "updating engine network map", slog.F("network_map", nm))
|
|
c.engine.SetNetworkMap(nm)
|
|
c.reconfig(nm, hosts)
|
|
})
|
|
}
|
|
if c.filterDirty {
|
|
f := c.filterLocked()
|
|
actions = append(actions, func() {
|
|
c.logger.Debug(context.Background(), "updating engine filter", slog.F("filter", f))
|
|
c.engine.SetFilter(f)
|
|
})
|
|
}
|
|
|
|
c.netmapDirty = false
|
|
c.filterDirty = false
|
|
c.derpMapDirty = false
|
|
c.phase = configuring
|
|
c.Broadcast()
|
|
|
|
c.L.Unlock()
|
|
for _, a := range actions {
|
|
a()
|
|
}
|
|
c.L.Lock()
|
|
}
|
|
}
|
|
|
|
// close closes the configMaps and stops it configuring the engine
|
|
func (c *configMaps) close() {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
for _, lc := range c.peers {
|
|
lc.resetLostTimer()
|
|
}
|
|
c.closing = true
|
|
c.Broadcast()
|
|
for c.phase != closed {
|
|
c.Wait()
|
|
}
|
|
}
|
|
|
|
// netMapLocked returns the current NetworkMap as determined by the config we
|
|
// have. c.L must be held.
|
|
func (c *configMaps) netMapLocked() *netmap.NetworkMap {
|
|
nm := new(netmap.NetworkMap)
|
|
*nm = c.static
|
|
|
|
nm.Addresses = make([]netip.Prefix, len(c.addresses))
|
|
copy(nm.Addresses, c.addresses)
|
|
|
|
// we don't need to set the DERPMap in the network map because we separately
|
|
// send the DERPMap directly via SetDERPMap
|
|
nm.Peers = c.peerConfigLocked()
|
|
nm.SelfNode.Addresses = nm.Addresses
|
|
nm.SelfNode.AllowedIPs = nm.Addresses
|
|
return nm
|
|
}
|
|
|
|
// hostsLocked returns the current DNS hosts mapping. c.L must be held.
|
|
func (c *configMaps) hostsLocked() map[dnsname.FQDN][]netip.Addr {
|
|
return maps.Clone(c.hosts)
|
|
}
|
|
|
|
// peerConfigLocked returns the set of peer nodes we have. c.L must be held.
|
|
func (c *configMaps) peerConfigLocked() []*tailcfg.Node {
|
|
out := make([]*tailcfg.Node, 0, len(c.peers))
|
|
for _, p := range c.peers {
|
|
// Don't add nodes that we havent received a READY_FOR_HANDSHAKE for
|
|
// yet, if they're a destination. If we received a READY_FOR_HANDSHAKE
|
|
// for a peer before we receive their node, the node will be nil.
|
|
if (!p.readyForHandshake && p.isDestination) || p.node == nil {
|
|
continue
|
|
}
|
|
n := p.node.Clone()
|
|
if c.blockEndpoints {
|
|
n.Endpoints = nil
|
|
}
|
|
out = append(out, n)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (c *configMaps) setTunnelDestination(id uuid.UUID) {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
lc, ok := c.peers[id]
|
|
if !ok {
|
|
lc = &peerLifecycle{
|
|
peerID: id,
|
|
}
|
|
c.logger.Debug(context.Background(), "setting peer tunnel destination", slog.F("peer_id", id))
|
|
c.peers[id] = lc
|
|
}
|
|
lc.isDestination = true
|
|
}
|
|
|
|
// setAddresses sets the addresses belonging to this node to the given slice. It
|
|
// triggers configuration of the engine if the addresses have changed.
|
|
// c.L MUST NOT be held.
|
|
func (c *configMaps) setAddresses(ips []netip.Prefix) {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
if d := prefixesDifferent(c.addresses, ips); !d {
|
|
return
|
|
}
|
|
c.addresses = make([]netip.Prefix, len(ips))
|
|
copy(c.addresses, ips)
|
|
c.netmapDirty = true
|
|
c.filterDirty = true
|
|
c.Broadcast()
|
|
}
|
|
|
|
func (c *configMaps) setHosts(hosts map[dnsname.FQDN][]netip.Addr) {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
c.hosts = make(map[dnsname.FQDN][]netip.Addr)
|
|
for name, addrs := range hosts {
|
|
c.hosts[name] = slices.Clone(addrs)
|
|
}
|
|
c.netmapDirty = true
|
|
c.Broadcast()
|
|
}
|
|
|
|
// setBlockEndpoints sets whether we should block configuring endpoints we learn
|
|
// from peers. It triggers a configuration of the engine if the value changes.
|
|
// nolint: revive
|
|
func (c *configMaps) setBlockEndpoints(blockEndpoints bool) {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
if c.blockEndpoints != blockEndpoints {
|
|
c.netmapDirty = true
|
|
}
|
|
c.blockEndpoints = blockEndpoints
|
|
c.Broadcast()
|
|
}
|
|
|
|
// getBlockEndpoints returns the value of the most recent setBlockEndpoints
|
|
// call.
|
|
func (c *configMaps) getBlockEndpoints() bool {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
return c.blockEndpoints
|
|
}
|
|
|
|
// setDERPMap sets the DERP map, triggering a configuration of the engine if it has changed.
|
|
// c.L MUST NOT be held.
|
|
// Returns if the derpMap is dirty.
|
|
func (c *configMaps) setDERPMap(derpMap *tailcfg.DERPMap) bool {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
if CompareDERPMaps(c.derpMap, derpMap) {
|
|
return false
|
|
}
|
|
c.derpMap = derpMap
|
|
c.derpMapDirty = true
|
|
c.Broadcast()
|
|
return true
|
|
}
|
|
|
|
// derMapLocked returns the current DERPMap. c.L must be held
|
|
func (c *configMaps) derpMapLocked() *tailcfg.DERPMap {
|
|
return c.derpMap.Clone()
|
|
}
|
|
|
|
// reconfig computes the correct wireguard config and calls the engine.Reconfig
|
|
// with the config we have. It is not intended for this to be called outside of
|
|
// the updateLoop()
|
|
func (c *configMaps) reconfig(nm *netmap.NetworkMap, hosts map[dnsname.FQDN][]netip.Addr) {
|
|
dnsCfg := &dns.Config{}
|
|
if len(hosts) > 0 {
|
|
dnsCfg.Hosts = hosts
|
|
dnsCfg.OnlyIPv6 = true
|
|
dnsCfg.Routes = map[dnsname.FQDN][]*dnstype.Resolver{
|
|
c.matchDomain: nil,
|
|
}
|
|
}
|
|
cfg, err := nmcfg.WGCfg(nm, Logger(c.logger.Named("net.wgconfig")), netmap.AllowSingleHosts, "")
|
|
if err != nil {
|
|
// WGCfg never returns an error at the time this code was written. If it starts, returning
|
|
// errors if/when we upgrade tailscale, we'll need to deal.
|
|
c.logger.Critical(context.Background(), "update wireguard config failed", slog.Error(err))
|
|
return
|
|
}
|
|
|
|
rc := &router.Config{
|
|
LocalAddrs: nm.Addresses,
|
|
Routes: []netip.Prefix{CoderServicePrefix.AsNetip()},
|
|
}
|
|
err = c.engine.Reconfig(cfg, rc, dnsCfg, &tailcfg.Debug{})
|
|
if err != nil {
|
|
if errors.Is(err, wgengine.ErrNoChanges) {
|
|
return
|
|
}
|
|
c.logger.Error(context.Background(), "failed to reconfigure wireguard engine", slog.Error(err))
|
|
}
|
|
}
|
|
|
|
// filterLocked returns the current filter, based on our local addresses. c.L
|
|
// must be held.
|
|
func (c *configMaps) filterLocked() *filter.Filter {
|
|
localIPSet := netipx.IPSetBuilder{}
|
|
for _, addr := range c.addresses {
|
|
localIPSet.AddPrefix(addr)
|
|
}
|
|
localIPs, _ := localIPSet.IPSet()
|
|
logIPSet := netipx.IPSetBuilder{}
|
|
logIPs, _ := logIPSet.IPSet()
|
|
return filter.New(
|
|
c.static.PacketFilter,
|
|
localIPs,
|
|
logIPs,
|
|
nil,
|
|
Logger(c.logger.Named("net.packet-filter")),
|
|
)
|
|
}
|
|
|
|
// updatePeers handles protocol updates about peers from the coordinator. c.L MUST NOT be held.
|
|
func (c *configMaps) updatePeers(updates []*proto.CoordinateResponse_PeerUpdate) {
|
|
status := c.status()
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
|
|
// Update all the lastHandshake values here. That way we don't have to
|
|
// worry about them being up-to-date when handling updates below, and it covers
|
|
// all peers, not just the ones we got updates about.
|
|
for _, lc := range c.peers {
|
|
if lc.node != nil {
|
|
if peerStatus, ok := status.Peer[lc.node.Key]; ok {
|
|
lc.lastHandshake = peerStatus.LastHandshake
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, update := range updates {
|
|
if dirty := c.updatePeerLocked(update, status); dirty {
|
|
c.netmapDirty = true
|
|
}
|
|
}
|
|
if c.netmapDirty {
|
|
c.Broadcast()
|
|
}
|
|
}
|
|
|
|
// status requests a status update from the engine.
|
|
func (c *configMaps) status() *ipnstate.Status {
|
|
sb := &ipnstate.StatusBuilder{WantPeers: true}
|
|
c.engine.UpdateStatus(sb)
|
|
return sb.Status()
|
|
}
|
|
|
|
// updatePeerLocked processes a single update for a single peer. It is intended
|
|
// as internal function since it returns whether or not the config is dirtied by
|
|
// the update (instead of handling it directly like updatePeers). c.L must be held.
|
|
func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdate, status *ipnstate.Status) (dirty bool) {
|
|
id, err := uuid.FromBytes(update.Id)
|
|
if err != nil {
|
|
c.logger.Critical(context.Background(), "received update with bad id", slog.F("id", update.Id))
|
|
return false
|
|
}
|
|
logger := c.logger.With(slog.F("peer_id", id))
|
|
lc, peerOk := c.peers[id]
|
|
var node *tailcfg.Node
|
|
if update.Kind == proto.CoordinateResponse_PeerUpdate_NODE {
|
|
// If no preferred DERP is provided, we can't reach the node.
|
|
if update.Node.PreferredDerp == 0 {
|
|
logger.Warn(context.Background(), "no preferred DERP, peer update", slog.F("node_proto", update.Node))
|
|
return false
|
|
}
|
|
node, err = c.protoNodeToTailcfg(update.Node)
|
|
if err != nil {
|
|
logger.Critical(context.Background(), "failed to convert proto node to tailcfg", slog.F("node_proto", update.Node))
|
|
return false
|
|
}
|
|
logger = logger.With(slog.F("key_id", node.Key.ShortString()), slog.F("node", node))
|
|
node.KeepAlive = c.nodeKeepalive(lc, status, node)
|
|
}
|
|
switch {
|
|
case !peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_NODE:
|
|
// new!
|
|
var lastHandshake time.Time
|
|
if ps, ok := status.Peer[node.Key]; ok {
|
|
lastHandshake = ps.LastHandshake
|
|
}
|
|
lc = &peerLifecycle{
|
|
peerID: id,
|
|
node: node,
|
|
lastHandshake: lastHandshake,
|
|
lost: false,
|
|
}
|
|
c.peers[id] = lc
|
|
logger.Debug(context.Background(), "adding new peer")
|
|
return lc.validForWireguard()
|
|
case peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_NODE:
|
|
// update
|
|
if lc.node != nil {
|
|
node.Created = lc.node.Created
|
|
}
|
|
dirty = !lc.node.Equal(node)
|
|
lc.node = node
|
|
// validForWireguard checks that the node is non-nil, so should be
|
|
// called after we update the node.
|
|
dirty = dirty && lc.validForWireguard()
|
|
lc.lost = false
|
|
lc.resetLostTimer()
|
|
if lc.isDestination && !lc.readyForHandshake {
|
|
// We received the node of a destination peer before we've received
|
|
// their READY_FOR_HANDSHAKE. Set a timer
|
|
lc.setReadyForHandshakeTimer(c)
|
|
logger.Debug(context.Background(), "setting ready for handshake timeout")
|
|
}
|
|
logger.Debug(context.Background(), "node update to existing peer", slog.F("dirty", dirty))
|
|
return dirty
|
|
case peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_READY_FOR_HANDSHAKE:
|
|
dirty := !lc.readyForHandshake
|
|
lc.readyForHandshake = true
|
|
if lc.readyForHandshakeTimer != nil {
|
|
lc.readyForHandshakeTimer.Stop()
|
|
}
|
|
if lc.node != nil {
|
|
old := lc.node.KeepAlive
|
|
lc.node.KeepAlive = c.nodeKeepalive(lc, status, lc.node)
|
|
dirty = dirty || (old != lc.node.KeepAlive)
|
|
}
|
|
logger.Debug(context.Background(), "peer ready for handshake")
|
|
// only force a reconfig if the node populated
|
|
return dirty && lc.node != nil
|
|
case !peerOk && update.Kind == proto.CoordinateResponse_PeerUpdate_READY_FOR_HANDSHAKE:
|
|
// When we receive a READY_FOR_HANDSHAKE for a peer we don't know about,
|
|
// we create a peerLifecycle with the peerID and set readyForHandshake
|
|
// to true. Eventually we should receive a NODE update for this peer,
|
|
// and it'll be programmed into wireguard.
|
|
logger.Debug(context.Background(), "got peer ready for handshake for unknown peer")
|
|
lc = &peerLifecycle{
|
|
peerID: id,
|
|
readyForHandshake: true,
|
|
}
|
|
c.peers[id] = lc
|
|
return false
|
|
case !peerOk:
|
|
// disconnected or lost, but we don't have the node. No op
|
|
logger.Debug(context.Background(), "skipping update for peer we don't recognize")
|
|
return false
|
|
case update.Kind == proto.CoordinateResponse_PeerUpdate_DISCONNECTED:
|
|
lc.resetLostTimer()
|
|
delete(c.peers, id)
|
|
logger.Debug(context.Background(), "disconnected peer")
|
|
return true
|
|
case update.Kind == proto.CoordinateResponse_PeerUpdate_LOST:
|
|
lc.lost = true
|
|
lc.setLostTimer(c)
|
|
logger.Debug(context.Background(), "marked peer lost")
|
|
// marking a node lost doesn't change anything right now, so dirty=false
|
|
return false
|
|
default:
|
|
logger.Warn(context.Background(), "unknown peer update", slog.F("kind", update.Kind))
|
|
return false
|
|
}
|
|
}
|
|
|
|
// setAllPeersLost marks all peers as lost. Typically, this is called when we lose connection to
|
|
// the Coordinator. (When we reconnect, we will get NODE updates for all peers that are still connected
|
|
// and mark them as not lost.)
|
|
func (c *configMaps) setAllPeersLost() {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
for _, lc := range c.peers {
|
|
if lc.lost {
|
|
// skip processing already lost nodes, as this just results in timer churn
|
|
continue
|
|
}
|
|
lc.lost = true
|
|
lc.setLostTimer(c)
|
|
// it's important to drop a log here so that we see it get marked lost if grepping thru
|
|
// the logs for a specific peer
|
|
keyID := "(nil node)"
|
|
if lc.node != nil {
|
|
keyID = lc.node.Key.ShortString()
|
|
}
|
|
c.logger.Debug(context.Background(),
|
|
"setAllPeersLost marked peer lost",
|
|
slog.F("peer_id", lc.peerID),
|
|
slog.F("key_id", keyID),
|
|
)
|
|
}
|
|
}
|
|
|
|
// peerLostTimeout is the callback that peerLifecycle uses when a peer is lost the timeout to
|
|
// receive a handshake fires.
|
|
func (c *configMaps) peerLostTimeout(id uuid.UUID) {
|
|
logger := c.logger.With(slog.F("peer_id", id))
|
|
logger.Debug(context.Background(),
|
|
"peer lost timeout")
|
|
|
|
// First do a status update to see if the peer did a handshake while we were
|
|
// waiting
|
|
status := c.status()
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
|
|
lc, ok := c.peers[id]
|
|
if !ok {
|
|
logger.Debug(context.Background(),
|
|
"timeout triggered for peer that is removed from the map")
|
|
return
|
|
}
|
|
if lc.node != nil {
|
|
if peerStatus, ok := status.Peer[lc.node.Key]; ok {
|
|
lc.lastHandshake = peerStatus.LastHandshake
|
|
}
|
|
logger = logger.With(slog.F("key_id", lc.node.Key.ShortString()))
|
|
}
|
|
if !lc.lost {
|
|
logger.Debug(context.Background(),
|
|
"timeout triggered for peer that is no longer lost")
|
|
return
|
|
}
|
|
since := c.clock.Since(lc.lastHandshake)
|
|
if since >= lostTimeout {
|
|
logger.Info(
|
|
context.Background(), "removing lost peer")
|
|
delete(c.peers, id)
|
|
c.netmapDirty = true
|
|
c.Broadcast()
|
|
return
|
|
}
|
|
logger.Debug(context.Background(),
|
|
"timeout triggered for peer but it had handshake in meantime")
|
|
lc.setLostTimer(c)
|
|
}
|
|
|
|
func (c *configMaps) protoNodeToTailcfg(p *proto.Node) (*tailcfg.Node, error) {
|
|
node, err := ProtoToNode(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &tailcfg.Node{
|
|
ID: tailcfg.NodeID(p.GetId()),
|
|
Created: c.clock.Now(),
|
|
Key: node.Key,
|
|
DiscoKey: node.DiscoKey,
|
|
Addresses: node.Addresses,
|
|
AllowedIPs: node.AllowedIPs,
|
|
Endpoints: node.Endpoints,
|
|
DERP: fmt.Sprintf("%s:%d", tailcfg.DerpMagicIP, node.PreferredDERP),
|
|
Hostinfo: (&tailcfg.Hostinfo{}).View(),
|
|
}, nil
|
|
}
|
|
|
|
// nodeAddresses returns the addresses for the peer with the given publicKey, if known.
|
|
func (c *configMaps) nodeAddresses(publicKey key.NodePublic) ([]netip.Prefix, bool) {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
for _, lc := range c.peers {
|
|
if lc.node != nil && lc.node.Key == publicKey {
|
|
return lc.node.Addresses, true
|
|
}
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func (c *configMaps) fillPeerDiagnostics(d *PeerDiagnostics, peerID uuid.UUID) {
|
|
status := c.status()
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
if c.derpMap != nil {
|
|
for j, r := range c.derpMap.Regions {
|
|
d.DERPRegionNames[j] = r.RegionName
|
|
}
|
|
}
|
|
lc, ok := c.peers[peerID]
|
|
if !ok || lc.node == nil {
|
|
return
|
|
}
|
|
|
|
d.ReceivedNode = lc.node
|
|
ps, ok := status.Peer[lc.node.Key]
|
|
if !ok {
|
|
return
|
|
}
|
|
d.LastWireguardHandshake = ps.LastHandshake
|
|
}
|
|
|
|
func (c *configMaps) knownPeerIDs() []uuid.UUID {
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
out := make([]uuid.UUID, 0, len(c.peers))
|
|
for id := range c.peers {
|
|
out = append(out, id)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (c *configMaps) peerReadyForHandshakeTimeout(peerID uuid.UUID) {
|
|
logger := c.logger.With(slog.F("peer_id", peerID))
|
|
logger.Debug(context.Background(), "peer ready for handshake timeout")
|
|
c.L.Lock()
|
|
defer c.L.Unlock()
|
|
lc, ok := c.peers[peerID]
|
|
if !ok {
|
|
logger.Debug(context.Background(),
|
|
"ready for handshake timeout triggered for peer that is removed from the map")
|
|
return
|
|
}
|
|
|
|
wasReady := lc.readyForHandshake
|
|
lc.readyForHandshake = true
|
|
if !wasReady {
|
|
logger.Info(context.Background(), "setting peer ready for handshake after timeout")
|
|
c.netmapDirty = true
|
|
c.Broadcast()
|
|
}
|
|
}
|
|
|
|
func (*configMaps) nodeKeepalive(lc *peerLifecycle, status *ipnstate.Status, node *tailcfg.Node) bool {
|
|
// If the peer is already active, keepalives should be enabled.
|
|
if peerStatus, statusOk := status.Peer[node.Key]; statusOk && peerStatus.Active {
|
|
return true
|
|
}
|
|
// If the peer is a destination, we should only enable keepalives if we've
|
|
// received the READY_FOR_HANDSHAKE.
|
|
if lc != nil && lc.isDestination && lc.readyForHandshake {
|
|
return true
|
|
}
|
|
|
|
// If none of the above are true, keepalives should not be enabled.
|
|
return false
|
|
}
|
|
|
|
type peerLifecycle struct {
|
|
peerID uuid.UUID
|
|
// isDestination specifies if the peer is a destination, meaning we
|
|
// initiated a tunnel to the peer. When the peer is a destination, we do not
|
|
// respond to node updates with `READY_FOR_HANDSHAKE`s, and we wait to
|
|
// program the peer into wireguard until we receive a READY_FOR_HANDSHAKE
|
|
// from the peer or the timeout is reached.
|
|
isDestination bool
|
|
// node is the tailcfg.Node for the peer. It may be nil until we receive a
|
|
// NODE update for it.
|
|
node *tailcfg.Node
|
|
lost bool
|
|
lastHandshake time.Time
|
|
lostTimer *quartz.Timer
|
|
readyForHandshake bool
|
|
readyForHandshakeTimer *quartz.Timer
|
|
}
|
|
|
|
func (l *peerLifecycle) resetLostTimer() {
|
|
if l.lostTimer != nil {
|
|
l.lostTimer.Stop()
|
|
l.lostTimer = nil
|
|
}
|
|
}
|
|
|
|
func (l *peerLifecycle) setLostTimer(c *configMaps) {
|
|
if l.lostTimer != nil {
|
|
l.lostTimer.Stop()
|
|
}
|
|
ttl := lostTimeout - c.clock.Since(l.lastHandshake)
|
|
if ttl <= 0 {
|
|
ttl = time.Nanosecond
|
|
}
|
|
l.lostTimer = c.clock.AfterFunc(ttl, func() {
|
|
c.peerLostTimeout(l.peerID)
|
|
})
|
|
}
|
|
|
|
const readyForHandshakeTimeout = 5 * time.Second
|
|
|
|
func (l *peerLifecycle) setReadyForHandshakeTimer(c *configMaps) {
|
|
if l.readyForHandshakeTimer != nil {
|
|
l.readyForHandshakeTimer.Stop()
|
|
}
|
|
l.readyForHandshakeTimer = c.clock.AfterFunc(readyForHandshakeTimeout, func() {
|
|
c.logger.Debug(context.Background(), "ready for handshake timeout", slog.F("peer_id", l.peerID))
|
|
c.peerReadyForHandshakeTimeout(l.peerID)
|
|
})
|
|
}
|
|
|
|
// validForWireguard returns true if the peer is ready to be programmed into
|
|
// wireguard.
|
|
func (l *peerLifecycle) validForWireguard() bool {
|
|
valid := l.node != nil
|
|
if l.isDestination {
|
|
return valid && l.readyForHandshake
|
|
}
|
|
return valid
|
|
}
|
|
|
|
// prefixesDifferent returns true if the two slices contain different prefixes
|
|
// where order doesn't matter.
|
|
func prefixesDifferent(a, b []netip.Prefix) bool {
|
|
if len(a) != len(b) {
|
|
return true
|
|
}
|
|
as := make(map[string]bool)
|
|
for _, p := range a {
|
|
as[p.String()] = true
|
|
}
|
|
for _, p := range b {
|
|
if !as[p.String()] {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// derpMapStringer converts a DERPMap into a readable string for logging, since
|
|
// it includes pointers that we want to know the contents of, not actual pointer
|
|
// address.
|
|
type derpMapStringer tailcfg.DERPMap
|
|
|
|
func (d *derpMapStringer) String() string {
|
|
out, err := json.Marshal((*tailcfg.DERPMap)(d))
|
|
if err != nil {
|
|
return fmt.Sprintf("!!!error marshaling DERPMap: %s", err.Error())
|
|
}
|
|
return string(out)
|
|
}
|