Track connections by local index id instead of vpn ip (#807)

This commit is contained in:
Nate Brown 2023-02-13 14:41:05 -06:00 committed by GitHub
parent 5bd8712946
commit a06977bbd5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 100 deletions

View file

@ -7,7 +7,6 @@ import (
"github.com/sirupsen/logrus"
"github.com/slackhq/nebula/header"
"github.com/slackhq/nebula/iputil"
)
// TODO: incount and outcount are intended as a shortcut to locking the mutexes for every single packet
@ -15,16 +14,16 @@ import (
type connectionManager struct {
hostMap *HostMap
in map[iputil.VpnIp]struct{}
in map[uint32]struct{}
inLock *sync.RWMutex
out map[iputil.VpnIp]struct{}
out map[uint32]struct{}
outLock *sync.RWMutex
TrafficTimer *LockingTimerWheel[iputil.VpnIp]
TrafficTimer *LockingTimerWheel[uint32]
intf *Interface
pendingDeletion map[iputil.VpnIp]int
pendingDeletion map[uint32]int
pendingDeletionLock *sync.RWMutex
pendingDeletionTimer *LockingTimerWheel[iputil.VpnIp]
pendingDeletionTimer *LockingTimerWheel[uint32]
checkInterval int
pendingDeletionInterval int
@ -36,15 +35,15 @@ type connectionManager struct {
func newConnectionManager(ctx context.Context, l *logrus.Logger, intf *Interface, checkInterval, pendingDeletionInterval int) *connectionManager {
nc := &connectionManager{
hostMap: intf.hostMap,
in: make(map[iputil.VpnIp]struct{}),
in: make(map[uint32]struct{}),
inLock: &sync.RWMutex{},
out: make(map[iputil.VpnIp]struct{}),
out: make(map[uint32]struct{}),
outLock: &sync.RWMutex{},
TrafficTimer: NewLockingTimerWheel[iputil.VpnIp](time.Millisecond*500, time.Second*60),
TrafficTimer: NewLockingTimerWheel[uint32](time.Millisecond*500, time.Second*60),
intf: intf,
pendingDeletion: make(map[iputil.VpnIp]int),
pendingDeletion: make(map[uint32]int),
pendingDeletionLock: &sync.RWMutex{},
pendingDeletionTimer: NewLockingTimerWheel[iputil.VpnIp](time.Millisecond*500, time.Second*60),
pendingDeletionTimer: NewLockingTimerWheel[uint32](time.Millisecond*500, time.Second*60),
checkInterval: checkInterval,
pendingDeletionInterval: pendingDeletionInterval,
l: l,
@ -53,41 +52,41 @@ func newConnectionManager(ctx context.Context, l *logrus.Logger, intf *Interface
return nc
}
func (n *connectionManager) In(ip iputil.VpnIp) {
func (n *connectionManager) In(localIndex uint32) {
n.inLock.RLock()
// If this already exists, return
if _, ok := n.in[ip]; ok {
if _, ok := n.in[localIndex]; ok {
n.inLock.RUnlock()
return
}
n.inLock.RUnlock()
n.inLock.Lock()
n.in[ip] = struct{}{}
n.in[localIndex] = struct{}{}
n.inLock.Unlock()
}
func (n *connectionManager) Out(ip iputil.VpnIp) {
func (n *connectionManager) Out(localIndex uint32) {
n.outLock.RLock()
// If this already exists, return
if _, ok := n.out[ip]; ok {
if _, ok := n.out[localIndex]; ok {
n.outLock.RUnlock()
return
}
n.outLock.RUnlock()
n.outLock.Lock()
// double check since we dropped the lock temporarily
if _, ok := n.out[ip]; ok {
if _, ok := n.out[localIndex]; ok {
n.outLock.Unlock()
return
}
n.out[ip] = struct{}{}
n.AddTrafficWatch(ip, n.checkInterval)
n.out[localIndex] = struct{}{}
n.AddTrafficWatch(localIndex, n.checkInterval)
n.outLock.Unlock()
}
func (n *connectionManager) CheckIn(vpnIp iputil.VpnIp) bool {
func (n *connectionManager) CheckIn(localIndex uint32) bool {
n.inLock.RLock()
if _, ok := n.in[vpnIp]; ok {
if _, ok := n.in[localIndex]; ok {
n.inLock.RUnlock()
return true
}
@ -95,35 +94,35 @@ func (n *connectionManager) CheckIn(vpnIp iputil.VpnIp) bool {
return false
}
func (n *connectionManager) ClearIP(ip iputil.VpnIp) {
func (n *connectionManager) ClearLocalIndex(localIndex uint32) {
n.inLock.Lock()
n.outLock.Lock()
delete(n.in, ip)
delete(n.out, ip)
delete(n.in, localIndex)
delete(n.out, localIndex)
n.inLock.Unlock()
n.outLock.Unlock()
}
func (n *connectionManager) ClearPendingDeletion(ip iputil.VpnIp) {
func (n *connectionManager) ClearPendingDeletion(localIndex uint32) {
n.pendingDeletionLock.Lock()
delete(n.pendingDeletion, ip)
delete(n.pendingDeletion, localIndex)
n.pendingDeletionLock.Unlock()
}
func (n *connectionManager) AddPendingDeletion(ip iputil.VpnIp) {
func (n *connectionManager) AddPendingDeletion(localIndex uint32) {
n.pendingDeletionLock.Lock()
if _, ok := n.pendingDeletion[ip]; ok {
n.pendingDeletion[ip] += 1
if _, ok := n.pendingDeletion[localIndex]; ok {
n.pendingDeletion[localIndex] += 1
} else {
n.pendingDeletion[ip] = 0
n.pendingDeletion[localIndex] = 0
}
n.pendingDeletionTimer.Add(ip, time.Second*time.Duration(n.pendingDeletionInterval))
n.pendingDeletionTimer.Add(localIndex, time.Second*time.Duration(n.pendingDeletionInterval))
n.pendingDeletionLock.Unlock()
}
func (n *connectionManager) checkPendingDeletion(ip iputil.VpnIp) bool {
func (n *connectionManager) checkPendingDeletion(localIndex uint32) bool {
n.pendingDeletionLock.RLock()
if _, ok := n.pendingDeletion[ip]; ok {
if _, ok := n.pendingDeletion[localIndex]; ok {
n.pendingDeletionLock.RUnlock()
return true
@ -132,8 +131,8 @@ func (n *connectionManager) checkPendingDeletion(ip iputil.VpnIp) bool {
return false
}
func (n *connectionManager) AddTrafficWatch(vpnIp iputil.VpnIp, seconds int) {
n.TrafficTimer.Add(vpnIp, time.Second*time.Duration(seconds))
func (n *connectionManager) AddTrafficWatch(localIndex uint32, seconds int) {
n.TrafficTimer.Add(localIndex, time.Second*time.Duration(seconds))
}
func (n *connectionManager) Start(ctx context.Context) {
@ -162,23 +161,23 @@ func (n *connectionManager) Run(ctx context.Context) {
func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) {
n.TrafficTimer.Advance(now)
for {
vpnIp, has := n.TrafficTimer.Purge()
localIndex, has := n.TrafficTimer.Purge()
if !has {
break
}
// Check for traffic coming back in from this host.
traf := n.CheckIn(vpnIp)
traf := n.CheckIn(localIndex)
hostinfo, err := n.hostMap.QueryVpnIp(vpnIp)
hostinfo, err := n.hostMap.QueryIndex(localIndex)
if err != nil {
n.l.Debugf("Not found in hostmap: %s", vpnIp)
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
n.l.WithField("localIndex", localIndex).Debugf("Not found in hostmap")
n.ClearLocalIndex(localIndex)
n.ClearPendingDeletion(localIndex)
continue
}
if n.handleInvalidCertificate(now, vpnIp, hostinfo) {
if n.handleInvalidCertificate(now, hostinfo) {
continue
}
@ -186,12 +185,12 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte)
// expired, just ignore.
if traf {
if n.l.Level >= logrus.DebugLevel {
n.l.WithField("vpnIp", vpnIp).
hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "alive", "method": "passive"}).
Debug("Tunnel status")
}
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
n.ClearLocalIndex(localIndex)
n.ClearPendingDeletion(localIndex)
continue
}
@ -201,12 +200,12 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte)
if hostinfo != nil && hostinfo.ConnectionState != nil {
// Send a test packet to trigger an authenticated tunnel test, this should suss out any lingering tunnel issues
n.intf.SendMessageToVpnIp(header.Test, header.TestRequest, vpnIp, p, nb, out)
n.intf.sendMessageToVpnIp(header.Test, header.TestRequest, hostinfo, p, nb, out)
} else {
hostinfo.logger(n.l).Debugf("Hostinfo sadness: %s", vpnIp)
hostinfo.logger(n.l).Debugf("Hostinfo sadness")
}
n.AddPendingDeletion(vpnIp)
n.AddPendingDeletion(localIndex)
}
}
@ -214,63 +213,58 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte)
func (n *connectionManager) HandleDeletionTick(now time.Time) {
n.pendingDeletionTimer.Advance(now)
for {
vpnIp, has := n.pendingDeletionTimer.Purge()
localIndex, has := n.pendingDeletionTimer.Purge()
if !has {
break
}
hostinfo, err := n.hostMap.QueryVpnIp(vpnIp)
hostinfo, err := n.hostMap.QueryIndex(localIndex)
if err != nil {
n.l.Debugf("Not found in hostmap: %s", vpnIp)
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
n.l.WithField("localIndex", localIndex).Debugf("Not found in hostmap")
n.ClearLocalIndex(localIndex)
n.ClearPendingDeletion(localIndex)
continue
}
if n.handleInvalidCertificate(now, vpnIp, hostinfo) {
if n.handleInvalidCertificate(now, hostinfo) {
continue
}
// If we saw an incoming packets from this ip and peer's certificate is not
// expired, just ignore.
traf := n.CheckIn(vpnIp)
traf := n.CheckIn(localIndex)
if traf {
n.l.WithField("vpnIp", vpnIp).
hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "alive", "method": "active"}).
Debug("Tunnel status")
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
n.ClearLocalIndex(localIndex)
n.ClearPendingDeletion(localIndex)
continue
}
// If it comes around on deletion wheel and hasn't resolved itself, delete
if n.checkPendingDeletion(vpnIp) {
if n.checkPendingDeletion(localIndex) {
cn := ""
if hostinfo.ConnectionState != nil && hostinfo.ConnectionState.peerCert != nil {
cn = hostinfo.ConnectionState.peerCert.Details.Name
}
hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "dead", "method": "active"}).
WithField("certName", cn).
Info("Tunnel status")
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
// TODO: This is only here to let tests work. Should do proper mocking
if n.intf.lightHouse != nil {
n.intf.lightHouse.DeleteVpnIp(vpnIp)
}
n.hostMap.DeleteHostInfo(hostinfo)
} else {
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
}
n.ClearLocalIndex(localIndex)
n.ClearPendingDeletion(localIndex)
}
}
// handleInvalidCertificates will destroy a tunnel if pki.disconnect_invalid is true and the certificate is no longer valid
func (n *connectionManager) handleInvalidCertificate(now time.Time, vpnIp iputil.VpnIp, hostinfo *HostInfo) bool {
func (n *connectionManager) handleInvalidCertificate(now time.Time, hostinfo *HostInfo) bool {
if !n.intf.disconnectInvalid {
return false
}
@ -286,8 +280,7 @@ func (n *connectionManager) handleInvalidCertificate(now time.Time, vpnIp iputil
}
fingerprint, _ := remoteCert.Sha256Sum()
n.l.WithField("vpnIp", vpnIp).WithError(err).
WithField("certName", remoteCert.Details.Name).
hostinfo.logger(n.l).WithError(err).
WithField("fingerprint", fingerprint).
Info("Remote certificate is no longer valid, tearing down the tunnel")
@ -295,7 +288,7 @@ func (n *connectionManager) handleInvalidCertificate(now time.Time, vpnIp iputil
n.intf.sendCloseTunnel(hostinfo)
n.intf.closeTunnel(hostinfo)
n.ClearIP(vpnIp)
n.ClearPendingDeletion(vpnIp)
n.ClearLocalIndex(hostinfo.localIndexId)
n.ClearPendingDeletion(hostinfo.localIndexId)
return true
}

View file

@ -71,16 +71,22 @@ func Test_NewConnectionManagerTest(t *testing.T) {
out := make([]byte, mtu)
nc.HandleMonitorTick(now, p, nb, out)
// Add an ip we have established a connection w/ to hostmap
hostinfo, _ := nc.hostMap.AddVpnIp(vpnIp, nil)
hostinfo := &HostInfo{
vpnIp: vpnIp,
localIndexId: 1099,
remoteIndexId: 9901,
}
hostinfo.ConnectionState = &ConnectionState{
certState: cs,
H: &noise.HandshakeState{},
}
nc.hostMap.addHostInfo(hostinfo, ifce)
// We saw traffic out to vpnIp
nc.Out(vpnIp)
assert.NotContains(t, nc.pendingDeletion, vpnIp)
assert.Contains(t, nc.hostMap.Hosts, vpnIp)
nc.Out(hostinfo.localIndexId)
assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnIp)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
// Move ahead 5s. Nothing should happen
next_tick := now.Add(5 * time.Second)
nc.HandleMonitorTick(next_tick, p, nb, out)
@ -90,16 +96,17 @@ func Test_NewConnectionManagerTest(t *testing.T) {
nc.HandleMonitorTick(next_tick, p, nb, out)
nc.HandleDeletionTick(next_tick)
// This host should now be up for deletion
assert.Contains(t, nc.pendingDeletion, vpnIp)
assert.Contains(t, nc.hostMap.Hosts, vpnIp)
assert.Contains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnIp)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
// Move ahead some more
next_tick = now.Add(45 * time.Second)
nc.HandleMonitorTick(next_tick, p, nb, out)
nc.HandleDeletionTick(next_tick)
// The host should be evicted
assert.NotContains(t, nc.pendingDeletion, vpnIp)
assert.NotContains(t, nc.hostMap.Hosts, vpnIp)
assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.NotContains(t, nc.hostMap.Hosts, hostinfo.vpnIp)
assert.NotContains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
}
func Test_NewConnectionManagerTest2(t *testing.T) {
@ -140,14 +147,19 @@ func Test_NewConnectionManagerTest2(t *testing.T) {
out := make([]byte, mtu)
nc.HandleMonitorTick(now, p, nb, out)
// Add an ip we have established a connection w/ to hostmap
hostinfo, _ := nc.hostMap.AddVpnIp(vpnIp, nil)
hostinfo := &HostInfo{
vpnIp: vpnIp,
localIndexId: 1099,
remoteIndexId: 9901,
}
hostinfo.ConnectionState = &ConnectionState{
certState: cs,
H: &noise.HandshakeState{},
}
nc.hostMap.addHostInfo(hostinfo, ifce)
// We saw traffic out to vpnIp
nc.Out(vpnIp)
nc.Out(hostinfo.localIndexId)
assert.NotContains(t, nc.pendingDeletion, vpnIp)
assert.Contains(t, nc.hostMap.Hosts, vpnIp)
// Move ahead 5s. Nothing should happen
@ -159,18 +171,19 @@ func Test_NewConnectionManagerTest2(t *testing.T) {
nc.HandleMonitorTick(next_tick, p, nb, out)
nc.HandleDeletionTick(next_tick)
// This host should now be up for deletion
assert.Contains(t, nc.pendingDeletion, vpnIp)
assert.Contains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, vpnIp)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
// We heard back this time
nc.In(vpnIp)
nc.In(hostinfo.localIndexId)
// Move ahead some more
next_tick = now.Add(45 * time.Second)
nc.HandleMonitorTick(next_tick, p, nb, out)
nc.HandleDeletionTick(next_tick)
// The host should be evicted
assert.NotContains(t, nc.pendingDeletion, vpnIp)
assert.Contains(t, nc.hostMap.Hosts, vpnIp)
// The host should not be evicted
assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnIp)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
}
// Check if we can disconnect the peer.
@ -257,13 +270,13 @@ func Test_NewConnectionManagerTest_DisconnectInvalid(t *testing.T) {
// Check if to disconnect with invalid certificate.
// Should be alive.
nextTick := now.Add(45 * time.Second)
destroyed := nc.handleInvalidCertificate(nextTick, vpnIp, hostinfo)
destroyed := nc.handleInvalidCertificate(nextTick, hostinfo)
assert.False(t, destroyed)
// Move ahead 61s.
// Check if to disconnect with invalid certificate.
// Should be disconnected.
nextTick = now.Add(61 * time.Second)
destroyed = nc.handleInvalidCertificate(nextTick, vpnIp, hostinfo)
destroyed = nc.handleInvalidCertificate(nextTick, hostinfo)
assert.True(t, destroyed)
}

View file

@ -764,7 +764,10 @@ func (i *HostInfo) logger(l *logrus.Logger) *logrus.Entry {
return logrus.NewEntry(l)
}
li := l.WithField("vpnIp", i.vpnIp)
li := l.WithField("vpnIp", i.vpnIp).
WithField("localIndex", i.localIndexId).
WithField("remoteIndex", i.remoteIndexId)
if connState := i.ConnectionState; connState != nil {
if peerCert := connState.peerCert; peerCert != nil {
li = li.WithField("certName", peerCert.Details.Name)

View file

@ -224,7 +224,7 @@ func (f *Interface) SendVia(viaIfc interface{},
c := via.ConnectionState.messageCounter.Add(1)
out = header.Encode(out, header.Version, header.Message, header.MessageRelay, relay.RemoteIndex, c)
f.connectionManager.Out(via.vpnIp)
f.connectionManager.Out(via.localIndexId)
// Authenticate the header and payload, but do not encrypt for this message type.
// The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload.
@ -284,7 +284,7 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
//l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p)
out = header.Encode(out, header.Version, t, st, hostinfo.remoteIndexId, c)
f.connectionManager.Out(hostinfo.vpnIp)
f.connectionManager.Out(hostinfo.localIndexId)
// Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against
// all our IPs and enable a faster roaming.

View file

@ -84,7 +84,7 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, via interface{}, out []by
signedPayload = signedPayload[header.Len:]
// Pull the Roaming parts up here, and return in all call paths.
f.handleHostRoaming(hostinfo, addr)
f.connectionManager.In(hostinfo.vpnIp)
f.connectionManager.In(hostinfo.localIndexId)
relay, ok := hostinfo.relayState.QueryRelayForByIdx(h.RemoteIndex)
if !ok {
@ -237,14 +237,14 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, via interface{}, out []by
f.handleHostRoaming(hostinfo, addr)
f.connectionManager.In(hostinfo.vpnIp)
f.connectionManager.In(hostinfo.localIndexId)
}
// closeTunnel closes a tunnel locally, it does not send a closeTunnel packet to the remote
func (f *Interface) closeTunnel(hostInfo *HostInfo) {
//TODO: this would be better as a single function in ConnectionManager that handled locks appropriately
f.connectionManager.ClearIP(hostInfo.vpnIp)
f.connectionManager.ClearPendingDeletion(hostInfo.vpnIp)
f.connectionManager.ClearLocalIndex(hostInfo.localIndexId)
f.connectionManager.ClearPendingDeletion(hostInfo.localIndexId)
f.lightHouse.DeleteVpnIp(hostInfo.vpnIp)
f.hostMap.DeleteHostInfo(hostInfo)
@ -405,7 +405,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
return
}
f.connectionManager.In(hostinfo.vpnIp)
f.connectionManager.In(hostinfo.localIndexId)
_, err = f.readers[q].Write(out)
if err != nil {
f.l.WithError(err).Error("Failed to write to tun")