From 6e0ae4f9a395e451d4754f35ab2c551e6eb8dc50 Mon Sep 17 00:00:00 2001 From: Wade Simmons Date: Mon, 13 Mar 2023 15:08:40 -0400 Subject: [PATCH] firewall: add option to send REJECT replies (#738) * firewall: add option to send REJECT replies This change allows you to configure the firewall to send REJECT packets when a packet is denied. firewall: # Action to take when a packet is not allowed by the firewall rules. # Can be one of: # `drop` (default): silently drop the packet. # `reject`: send a reject reply. # - For TCP, this will be a RST "Connection Reset" packet. # - For other protocols, this will be an ICMP port unreachable packet. outbound_action: drop inbound_action: drop These packets are only sent to established tunnels, and only on the overlay network (currently IPv4 only). $ ping -c1 192.168.100.3 PING 192.168.100.3 (192.168.100.3) 56(84) bytes of data. From 192.168.100.3 icmp_seq=2 Destination Port Unreachable --- 192.168.100.3 ping statistics --- 2 packets transmitted, 0 received, +1 errors, 100% packet loss, time 31ms $ nc -nzv 192.168.100.3 22 (UNKNOWN) [192.168.100.3] 22 (?) : Connection refused This change also modifies the smoke test to capture tcpdump pcaps from both the inside and outside to inspect what is going on over the wire. It also now does TCP and UDP packet tests using the Nmap version of ncat. * calculate seq and ack the same was as the kernel The logic a bit confusing, so we copy it straight from how the kernel does iptables `--reject-with tcp-reset`: - https://github.com/torvalds/linux/blob/v5.19/net/ipv4/netfilter/nf_reject_ipv4.c#L193-L221 * cleanup --- .github/workflows/smoke/Dockerfile | 4 +- .github/workflows/smoke/genconfig.sh | 2 + .github/workflows/smoke/smoke.sh | 43 ++++++ examples/config.yml | 9 ++ firewall.go | 25 ++++ inside.go | 39 ++++- iputil/packet.go | 211 +++++++++++++++++++++++++++ outside.go | 1 + overlay/tun_disabled.go | 51 +------ 9 files changed, 332 insertions(+), 53 deletions(-) create mode 100644 iputil/packet.go diff --git a/.github/workflows/smoke/Dockerfile b/.github/workflows/smoke/Dockerfile index 18460b3..f8a89ef 100644 --- a/.github/workflows/smoke/Dockerfile +++ b/.github/workflows/smoke/Dockerfile @@ -1,4 +1,6 @@ -FROM debian:buster +FROM ubuntu:jammy + +RUN apt-get update && apt-get install -y iputils-ping ncat tcpdump ADD ./build /nebula diff --git a/.github/workflows/smoke/genconfig.sh b/.github/workflows/smoke/genconfig.sh index 005734c..373ea5f 100755 --- a/.github/workflows/smoke/genconfig.sh +++ b/.github/workflows/smoke/genconfig.sh @@ -50,6 +50,8 @@ tun: dev: ${TUN_DEV:-nebula1} firewall: + inbound_action: reject + outbound_action: reject outbound: ${OUTBOUND:-$FIREWALL_ALL} inbound: ${INBOUND:-$FIREWALL_ALL} diff --git a/.github/workflows/smoke/smoke.sh b/.github/workflows/smoke/smoke.sh index 213add3..836e61a 100755 --- a/.github/workflows/smoke/smoke.sh +++ b/.github/workflows/smoke/smoke.sh @@ -34,6 +34,21 @@ sleep 1 sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/ [host4] /' & sleep 1 +# grab tcpdump pcaps for debugging +sudo docker exec lighthouse1 tcpdump -i nebula1 -q -w - -U 2>logs/lighthouse1.inside.log >logs/lighthouse1.inside.pcap & +sudo docker exec lighthouse1 tcpdump -i eth0 -q -w - -U 2>logs/lighthouse1.outside.log >logs/lighthouse1.outside.pcap & +sudo docker exec host2 tcpdump -i nebula1 -q -w - -U 2>logs/host2.inside.log >logs/host2.inside.pcap & +sudo docker exec host2 tcpdump -i eth0 -q -w - -U 2>logs/host2.outside.log >logs/host2.outside.pcap & +sudo docker exec host3 tcpdump -i nebula1 -q -w - -U 2>logs/host3.inside.log >logs/host3.inside.pcap & +sudo docker exec host3 tcpdump -i eth0 -q -w - -U 2>logs/host3.outside.log >logs/host3.outside.pcap & +sudo docker exec host4 tcpdump -i nebula1 -q -w - -U 2>logs/host4.inside.log >logs/host4.inside.pcap & +sudo docker exec host4 tcpdump -i eth0 -q -w - -U 2>logs/host4.outside.log >logs/host4.outside.pcap & + +sudo docker exec host2 ncat -nklv 0.0.0.0 2000 & +sudo docker exec host3 ncat -nklv 0.0.0.0 2000 & +sudo docker exec host2 ncat -e '/usr/bin/echo host2' -nkluv 0.0.0.0 3000 & +sudo docker exec host3 ncat -e '/usr/bin/echo host3' -nkluv 0.0.0.0 3000 & + set +x echo echo " *** Testing ping from lighthouse1" @@ -51,6 +66,15 @@ sudo docker exec host2 ping -c1 192.168.100.1 # Should fail because not allowed by host3 inbound firewall ! sudo docker exec host2 ping -c1 192.168.100.3 -w5 || exit 1 +set +x +echo +echo " *** Testing ncat from host2" +echo +set -x +# Should fail because not allowed by host3 inbound firewall +! sudo docker exec host2 ncat -nzv -w5 192.168.100.3 2000 || exit 1 +! sudo docker exec host2 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1 + set +x echo echo " *** Testing ping from host3" @@ -59,6 +83,14 @@ set -x sudo docker exec host3 ping -c1 192.168.100.1 sudo docker exec host3 ping -c1 192.168.100.2 +set +x +echo +echo " *** Testing ncat from host3" +echo +set -x +sudo docker exec host3 ncat -nzv -w5 192.168.100.2 2000 +sudo docker exec host3 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2 + set +x echo echo " *** Testing ping from host4" @@ -69,6 +101,17 @@ sudo docker exec host4 ping -c1 192.168.100.1 ! sudo docker exec host4 ping -c1 192.168.100.2 -w5 || exit 1 ! sudo docker exec host4 ping -c1 192.168.100.3 -w5 || exit 1 +set +x +echo +echo " *** Testing ncat from host4" +echo +set -x +# Should fail because not allowed by host4 outbound firewall +! sudo docker exec host4 ncat -nzv -w5 192.168.100.2 2000 || exit 1 +! sudo docker exec host4 ncat -nzv -w5 192.168.100.3 2000 || exit 1 +! sudo docker exec host4 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2 || exit 1 +! sudo docker exec host4 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1 + set +x echo echo " *** Testing conntrack" diff --git a/examples/config.yml b/examples/config.yml index f214bf7..9fe95ce 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -259,6 +259,15 @@ logging: # Nebula security group configuration firewall: + # Action to take when a packet is not allowed by the firewall rules. + # Can be one of: + # `drop` (default): silently drop the packet. + # `reject`: send a reject reply. + # - For TCP, this will be a RST "Connection Reset" packet. + # - For other protocols, this will be an ICMP port unreachable packet. + outbound_action: drop + inbound_action: drop + conntrack: tcp_timeout: 12m udp_timeout: 3m diff --git a/firewall.go b/firewall.go index 9fd75fc..061d9e6 100644 --- a/firewall.go +++ b/firewall.go @@ -47,6 +47,9 @@ type Firewall struct { InRules *FirewallTable OutRules *FirewallTable + InSendReject bool + OutSendReject bool + //TODO: we should have many more options for TCP, an option for ICMP, and mimic the kernel a bit better // https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt TCPTimeout time.Duration //linux: 5 days max @@ -179,6 +182,28 @@ func NewFirewallFromConfig(l *logrus.Logger, nc *cert.NebulaCertificate, c *conf //TODO: max_connections ) + inboundAction := c.GetString("firewall.inbound_action", "drop") + switch inboundAction { + case "reject": + fw.InSendReject = true + case "drop": + fw.InSendReject = false + default: + l.WithField("action", inboundAction).Warn("invalid firewall.inbound_action, defaulting to `drop`") + fw.InSendReject = false + } + + outboundAction := c.GetString("firewall.outbound_action", "drop") + switch outboundAction { + case "reject": + fw.OutSendReject = true + case "drop": + fw.OutSendReject = false + default: + l.WithField("action", inboundAction).Warn("invalid firewall.outbound_action, defaulting to `drop`") + fw.OutSendReject = false + } + err := AddFirewallRulesFromConfig(l, false, c, fw) if err != nil { return nil, err diff --git a/inside.go b/inside.go index 38d9332..0734883 100644 --- a/inside.go +++ b/inside.go @@ -46,6 +46,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet hostinfo := f.getOrHandshake(fwPacket.RemoteIP) if hostinfo == nil { + f.rejectInside(packet, out, q) if f.l.Level >= logrus.DebugLevel { f.l.WithField("vpnIp", fwPacket.RemoteIP). WithField("fwPacket", fwPacket). @@ -71,14 +72,42 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet if dropReason == nil { f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q) - } else if f.l.Level >= logrus.DebugLevel { - hostinfo.logger(f.l). - WithField("fwPacket", fwPacket). - WithField("reason", dropReason). - Debugln("dropping outbound packet") + } else { + f.rejectInside(packet, out, q) + if f.l.Level >= logrus.DebugLevel { + hostinfo.logger(f.l). + WithField("fwPacket", fwPacket). + WithField("reason", dropReason). + Debugln("dropping outbound packet") + } } } +func (f *Interface) rejectInside(packet []byte, out []byte, q int) { + if !f.firewall.InSendReject { + return + } + + out = iputil.CreateRejectPacket(packet, out) + _, err := f.readers[q].Write(out) + if err != nil { + f.l.WithError(err).Error("Failed to write to tun") + } +} + +func (f *Interface) rejectOutside(packet []byte, ci *ConnectionState, hostinfo *HostInfo, nb, out []byte, q int) { + if !f.firewall.OutSendReject { + return + } + + // Use some out buffer space to build the packet before encryption + // Need 40 bytes for the reject packet (20 byte ipv4 header, 20 byte tcp rst packet) + // Leave 100 bytes for the encrypted packet (60 byte Nebula header, 40 byte reject packet) + out = out[:140] + outPacket := iputil.CreateRejectPacket(packet, out[100:]) + f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, outPacket, nb, out, q) +} + func (f *Interface) Handshake(vpnIp iputil.VpnIp) { f.getOrHandshake(vpnIp) } diff --git a/iputil/packet.go b/iputil/packet.go new file mode 100644 index 0000000..74ae37f --- /dev/null +++ b/iputil/packet.go @@ -0,0 +1,211 @@ +package iputil + +import ( + "encoding/binary" + + "golang.org/x/net/ipv4" +) + +func CreateRejectPacket(packet []byte, out []byte) []byte { + // TODO ipv4 only, need to fix when inside supports ipv6 + switch packet[9] { + case 6: // tcp + return ipv4CreateRejectTCPPacket(packet, out) + default: + return ipv4CreateRejectICMPPacket(packet, out) + } +} + +func ipv4CreateRejectICMPPacket(packet []byte, out []byte) []byte { + ihl := int(packet[0]&0x0f) << 2 + + // ICMP reply includes header and first 8 bytes of the packet + packetLen := len(packet) + if packetLen > ihl+8 { + packetLen = ihl + 8 + } + + outLen := ipv4.HeaderLen + 8 + packetLen + + out = out[:(outLen)] + + ipHdr := out[0:ipv4.HeaderLen] + ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2) // version, ihl + ipHdr[1] = 0 // DSCP, ECN + binary.BigEndian.PutUint16(ipHdr[2:], uint16(ipv4.HeaderLen+8+packetLen)) // Total Length + + ipHdr[4] = 0 // id + ipHdr[5] = 0 // . + ipHdr[6] = 0 // flags, fragment offset + ipHdr[7] = 0 // . + ipHdr[8] = 64 // TTL + ipHdr[9] = 1 // protocol (icmp) + ipHdr[10] = 0 // checksum + ipHdr[11] = 0 // . + + // Swap dest / src IPs + copy(ipHdr[12:16], packet[16:20]) + copy(ipHdr[16:20], packet[12:16]) + + // Calculate checksum + binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0)) + + // ICMP Destination Unreachable + icmpOut := out[ipv4.HeaderLen:] + icmpOut[0] = 3 // type (Destination unreachable) + icmpOut[1] = 3 // code (Port unreachable error) + icmpOut[2] = 0 // checksum + icmpOut[3] = 0 // . + icmpOut[4] = 0 // unused + icmpOut[5] = 0 // . + icmpOut[6] = 0 // . + icmpOut[7] = 0 // . + + // Copy original IP header and first 8 bytes as body + copy(icmpOut[8:], packet[:packetLen]) + + // Calculate checksum + binary.BigEndian.PutUint16(icmpOut[2:], tcpipChecksum(icmpOut, 0)) + + return out +} + +func ipv4CreateRejectTCPPacket(packet []byte, out []byte) []byte { + const tcpLen = 20 + + ihl := int(packet[0]&0x0f) << 2 + outLen := ipv4.HeaderLen + tcpLen + + out = out[:(outLen)] + + ipHdr := out[0:ipv4.HeaderLen] + ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2) // version, ihl + ipHdr[1] = 0 // DSCP, ECN + binary.BigEndian.PutUint16(ipHdr[2:], uint16(outLen)) // Total Length + ipHdr[4] = 0 // id + ipHdr[5] = 0 // . + ipHdr[6] = 0 // flags, fragment offset + ipHdr[7] = 0 // . + ipHdr[8] = 64 // TTL + ipHdr[9] = 6 // protocol (tcp) + ipHdr[10] = 0 // checksum + ipHdr[11] = 0 // . + + // Swap dest / src IPs + copy(ipHdr[12:16], packet[16:20]) + copy(ipHdr[16:20], packet[12:16]) + + // Calculate checksum + binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0)) + + // TCP RST + tcpIn := packet[ihl:] + var ackSeq, seq uint32 + outFlags := byte(0b00000100) // RST + + // Set seq and ackSeq based on how iptables/netfilter does it in Linux: + // - https://github.com/torvalds/linux/blob/v5.19/net/ipv4/netfilter/nf_reject_ipv4.c#L193-L221 + inAck := tcpIn[13]&0b00010000 != 0 + if inAck { + seq = binary.BigEndian.Uint32(tcpIn[8:]) + } else { + inSyn := uint32((tcpIn[13] & 0b00000010) >> 1) + inFin := uint32(tcpIn[13] & 0b00000001) + // seq from the packet + syn + fin + tcp segment length + ackSeq = binary.BigEndian.Uint32(tcpIn[4:]) + inSyn + inFin + uint32(len(tcpIn)) - uint32(tcpIn[12]>>4)<<2 + outFlags |= 0b00010000 // ACK + } + + tcpOut := out[ipv4.HeaderLen:] + // Swap dest / src ports + copy(tcpOut[0:2], tcpIn[2:4]) + copy(tcpOut[2:4], tcpIn[0:2]) + binary.BigEndian.PutUint32(tcpOut[4:], seq) + binary.BigEndian.PutUint32(tcpOut[8:], ackSeq) + tcpOut[12] = (tcpLen >> 2) << 4 // data offset, reserved, NS + tcpOut[13] = outFlags // CWR, ECE, URG, ACK, PSH, RST, SYN, FIN + tcpOut[14] = 0 // window size + tcpOut[15] = 0 // . + tcpOut[16] = 0 // checksum + tcpOut[17] = 0 // . + tcpOut[18] = 0 // URG Pointer + tcpOut[19] = 0 // . + + // Calculate checksum + csum := ipv4PseudoheaderChecksum(ipHdr[12:16], ipHdr[16:20], 6, tcpLen) + binary.BigEndian.PutUint16(tcpOut[16:], tcpipChecksum(tcpOut, csum)) + + return out +} + +func CreateICMPEchoResponse(packet, out []byte) []byte { + // Return early if this is not a simple ICMP Echo Request + //TODO: make constants out of these + if !(len(packet) >= 28 && len(packet) <= 9001 && packet[0] == 0x45 && packet[9] == 0x01 && packet[20] == 0x08) { + return nil + } + + // We don't support fragmented packets + if packet[7] != 0 || (packet[6]&0x2F != 0) { + return nil + } + + out = out[:len(packet)] + + copy(out, packet) + + // Swap dest / src IPs and recalculate checksum + ipv4 := out[0:20] + copy(ipv4[12:16], packet[16:20]) + copy(ipv4[16:20], packet[12:16]) + ipv4[10] = 0 + ipv4[11] = 0 + binary.BigEndian.PutUint16(ipv4[10:], tcpipChecksum(ipv4, 0)) + + // Change type to ICMP Echo Reply and recalculate checksum + icmp := out[20:] + icmp[0] = 0 + icmp[2] = 0 + icmp[3] = 0 + binary.BigEndian.PutUint16(icmp[2:], tcpipChecksum(icmp, 0)) + + return out +} + +// calculates the TCP/IP checksum defined in rfc1071. The passed-in +// csum is any initial checksum data that's already been computed. +// +// based on: +// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L50-L70 +func tcpipChecksum(data []byte, csum uint32) uint16 { + // to handle odd lengths, we loop to length - 1, incrementing by 2, then + // handle the last byte specifically by checking against the original + // length. + length := len(data) - 1 + for i := 0; i < length; i += 2 { + // For our test packet, doing this manually is about 25% faster + // (740 ns vs. 1000ns) than doing it by calling binary.BigEndian.Uint16. + csum += uint32(data[i]) << 8 + csum += uint32(data[i+1]) + } + if len(data)%2 == 1 { + csum += uint32(data[length]) << 8 + } + for csum > 0xffff { + csum = (csum >> 16) + (csum & 0xffff) + } + return ^uint16(csum) +} + +// based on: +// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L26-L35 +func ipv4PseudoheaderChecksum(src, dst []byte, proto, length uint32) (csum uint32) { + csum += (uint32(src[0]) + uint32(src[2])) << 8 + csum += uint32(src[1]) + uint32(src[3]) + csum += (uint32(dst[0]) + uint32(dst[2])) << 8 + csum += uint32(dst[1]) + uint32(dst[3]) + csum += proto + csum += length & 0xffff + csum += length >> 16 + return csum +} diff --git a/outside.go b/outside.go index 605325d..8fa90be 100644 --- a/outside.go +++ b/outside.go @@ -399,6 +399,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out dropReason := f.firewall.Drop(out, *fwPacket, true, hostinfo, f.caPool, localCache) if dropReason != nil { + f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, out, q) if f.l.Level >= logrus.DebugLevel { hostinfo.logger(f.l).WithField("fwPacket", fwPacket). WithField("reason", dropReason). diff --git a/overlay/tun_disabled.go b/overlay/tun_disabled.go index b7f7273..e1e4ede 100644 --- a/overlay/tun_disabled.go +++ b/overlay/tun_disabled.go @@ -1,7 +1,6 @@ package overlay import ( - "encoding/binary" "fmt" "io" "net" @@ -75,38 +74,15 @@ func (t *disabledTun) Read(b []byte) (int, error) { } func (t *disabledTun) handleICMPEchoRequest(b []byte) bool { - // Return early if this is not a simple ICMP Echo Request - //TODO: make constants out of these - if !(len(b) >= 28 && len(b) <= 9001 && b[0] == 0x45 && b[9] == 0x01 && b[20] == 0x08) { + out := make([]byte, len(b)) + out = iputil.CreateICMPEchoResponse(b, out) + if out == nil { return false } - // We don't support fragmented packets - if b[7] != 0 || (b[6]&0x2F != 0) { - return false - } - - buf := make([]byte, len(b)) - copy(buf, b) - - // Swap dest / src IPs and recalculate checksum - ipv4 := buf[0:20] - copy(ipv4[12:16], b[16:20]) - copy(ipv4[16:20], b[12:16]) - ipv4[10] = 0 - ipv4[11] = 0 - binary.BigEndian.PutUint16(ipv4[10:], ipChecksum(ipv4)) - - // Change type to ICMP Echo Reply and recalculate checksum - icmp := buf[20:] - icmp[0] = 0 - icmp[2] = 0 - icmp[3] = 0 - binary.BigEndian.PutUint16(icmp[2:], ipChecksum(icmp)) - // attempt to write it, but don't block select { - case t.read <- buf: + case t.read <- out: default: t.l.Debugf("tun_disabled: dropped ICMP Echo Reply response") } @@ -154,22 +130,3 @@ func (p prettyPacket) String() string { return s.String() } - -func ipChecksum(b []byte) uint16 { - var c uint32 - sz := len(b) - 1 - - for i := 0; i < sz; i += 2 { - c += uint32(b[i]) << 8 - c += uint32(b[i+1]) - } - if sz%2 == 0 { - c += uint32(b[sz]) << 8 - } - - for (c >> 16) > 0 { - c = (c & 0xffff) + (c >> 16) - } - - return ^uint16(c) -}