2019-11-19 17:00:20 +00:00
package nebula
import (
2024-07-31 10:18:56 -05:00
"net/netip"
2019-11-19 17:00:20 +00:00
"github.com/sirupsen/logrus"
2021-11-03 20:54:04 -05:00
"github.com/slackhq/nebula/firewall"
"github.com/slackhq/nebula/header"
"github.com/slackhq/nebula/iputil"
2023-04-05 11:08:23 -04:00
"github.com/slackhq/nebula/noiseutil"
2025-03-24 23:15:59 +01:00
"github.com/slackhq/nebula/routing"
2019-11-19 17:00:20 +00:00
)
2021-11-03 20:54:04 -05:00
func ( f * Interface ) consumeInsidePacket ( packet [ ] byte , fwPacket * firewall . Packet , nb , out [ ] byte , q int , localCache firewall . ConntrackCache ) {
2019-11-19 17:00:20 +00:00
err := newPacket ( packet , false , fwPacket )
if err != nil {
2022-09-01 09:44:58 -05:00
if f . l . Level >= logrus . DebugLevel {
f . l . WithField ( "packet" , packet ) . Debugf ( "Error while validating outbound packet: %s" , err )
}
2019-11-19 17:00:20 +00:00
return
}
// Ignore local broadcast packets
2025-03-06 11:28:26 -06:00
if f . dropLocalBroadcast {
2025-04-18 12:37:20 -04:00
if f . myBroadcastAddrsTable . Contains ( fwPacket . RemoteAddr ) {
2025-03-06 11:28:26 -06:00
return
}
2019-11-19 17:00:20 +00:00
}
2025-04-18 12:37:20 -04:00
if f . myVpnAddrsTable . Contains ( fwPacket . RemoteAddr ) {
2022-06-27 14:36:10 -04:00
// Immediately forward packets from self to self.
2023-01-23 16:51:54 -05:00
// This should only happen on Darwin-based and FreeBSD hosts, which
2025-03-06 11:28:26 -06:00
// routes packets from the Nebula addr to the Nebula addr through the Nebula
2023-01-23 16:51:54 -05:00
// TUN device.
2022-06-27 14:36:10 -04:00
if immediatelyForwardToSelf {
_ , err := f . readers [ q ] . Write ( packet )
if err != nil {
f . l . WithError ( err ) . Error ( "Failed to forward to tun" )
}
}
// Otherwise, drop. On linux, we should never see these packets - Linux
2025-03-06 11:28:26 -06:00
// routes packets from the nebula addr to the nebula addr through the loopback device.
2020-02-21 16:49:54 -08:00
return
}
2024-07-31 10:18:56 -05:00
// Ignore multicast packets
2025-03-06 11:28:26 -06:00
if f . dropMulticast && fwPacket . RemoteAddr . IsMulticast ( ) {
2019-11-19 17:00:20 +00:00
return
}
2025-03-24 23:15:59 +01:00
hostinfo , ready := f . getOrHandshakeConsiderRouting ( fwPacket , func ( hh * HandshakeHostInfo ) {
2023-11-02 16:53:59 -05:00
hh . cachePacket ( f . l , header . Message , 0 , packet , f . sendMessageNow , f . cachedPacketMetrics )
2023-08-21 18:51:45 -05:00
} )
2020-08-04 22:59:04 -04:00
if hostinfo == nil {
2023-03-13 15:08:40 -04:00
f . rejectInside ( packet , out , q )
2021-03-26 09:46:30 -05:00
if f . l . Level >= logrus . DebugLevel {
2025-03-06 11:28:26 -06:00
f . l . WithField ( "vpnAddr" , fwPacket . RemoteAddr ) .
2020-08-04 22:59:04 -04:00
WithField ( "fwPacket" , fwPacket ) .
2025-03-06 11:28:26 -06:00
Debugln ( "dropping outbound packet, vpnAddr not in our vpn networks or in unsafe networks" )
2020-08-04 22:59:04 -04:00
}
return
}
2023-08-21 18:51:45 -05:00
if ! ready {
return
2019-11-19 17:00:20 +00:00
}
2024-04-11 21:44:22 -05:00
dropReason := f . firewall . Drop ( * fwPacket , false , hostinfo , f . pki . GetCAPool ( ) , localCache )
2020-04-10 10:57:21 -07:00
if dropReason == nil {
2024-07-31 10:18:56 -05:00
f . sendNoMetrics ( header . Message , 0 , hostinfo . ConnectionState , hostinfo , netip . AddrPort { } , packet , nb , out , q )
2019-11-19 17:00:20 +00:00
2023-03-13 15:08:40 -04:00
} else {
f . rejectInside ( packet , out , q )
if f . l . Level >= logrus . DebugLevel {
hostinfo . logger ( f . l ) .
WithField ( "fwPacket" , fwPacket ) .
WithField ( "reason" , dropReason ) .
Debugln ( "dropping outbound packet" )
}
}
}
func ( f * Interface ) rejectInside ( packet [ ] byte , out [ ] byte , q int ) {
if ! f . firewall . InSendReject {
return
2019-11-19 17:00:20 +00:00
}
2023-03-13 15:08:40 -04:00
out = iputil . CreateRejectPacket ( packet , out )
2023-11-13 10:43:51 -08:00
if len ( out ) == 0 {
return
}
2023-03-13 15:08:40 -04:00
_ , err := f . readers [ q ] . Write ( out )
if err != nil {
f . l . WithError ( err ) . Error ( "Failed to write to tun" )
}
}
func ( f * Interface ) rejectOutside ( packet [ ] byte , ci * ConnectionState , hostinfo * HostInfo , nb , out [ ] byte , q int ) {
if ! f . firewall . OutSendReject {
return
}
2023-11-13 10:43:51 -08:00
out = iputil . CreateRejectPacket ( packet , out )
if len ( out ) == 0 {
return
}
if len ( out ) > iputil . MaxRejectPacketSize {
if f . l . GetLevel ( ) >= logrus . InfoLevel {
f . l .
WithField ( "packet" , packet ) .
WithField ( "outPacket" , out ) .
Info ( "rejectOutside: packet too big, not sending" )
}
return
}
2024-07-31 10:18:56 -05:00
f . sendNoMetrics ( header . Message , 0 , ci , hostinfo , netip . AddrPort { } , out , nb , packet , q )
2019-11-19 17:00:20 +00:00
}
2025-03-24 23:15:59 +01:00
// Handshake will attempt to initiate a tunnel with the provided vpn address if it is within our vpn networks. This is a no-op if the tunnel is already established or being established
2025-03-06 11:28:26 -06:00
func ( f * Interface ) Handshake ( vpnAddr netip . Addr ) {
2025-03-24 23:15:59 +01:00
f . getOrHandshakeNoRouting ( vpnAddr , nil )
2022-06-21 14:35:23 -04:00
}
2025-03-24 23:15:59 +01:00
// getOrHandshakeNoRouting returns nil if the vpnAddr is not routable.
2023-08-21 18:51:45 -05:00
// If the 2nd return var is false then the hostinfo is not ready to be used in a tunnel
2025-03-24 23:15:59 +01:00
func ( f * Interface ) getOrHandshakeNoRouting ( vpnAddr netip . Addr , cacheCallback func ( * HandshakeHostInfo ) ) ( * HostInfo , bool ) {
2025-04-18 12:37:20 -04:00
if f . myVpnNetworksTable . Contains ( vpnAddr ) {
2025-03-24 23:15:59 +01:00
return f . handshakeManager . GetOrHandshake ( vpnAddr , cacheCallback )
}
return nil , false
}
// getOrHandshakeConsiderRouting will try to find the HostInfo to handle this packet, starting a handshake if necessary.
// If the 2nd return var is false then the hostinfo is not ready to be used in a tunnel.
func ( f * Interface ) getOrHandshakeConsiderRouting ( fwPacket * firewall . Packet , cacheCallback func ( * HandshakeHostInfo ) ) ( * HostInfo , bool ) {
destinationAddr := fwPacket . RemoteAddr
hostinfo , ready := f . getOrHandshakeNoRouting ( destinationAddr , cacheCallback )
// Host is inside the mesh, no routing required
if hostinfo != nil {
return hostinfo , ready
}
gateways := f . inside . RoutesFor ( destinationAddr )
switch len ( gateways ) {
case 0 :
return nil , false
case 1 :
// Single gateway route
return f . handshakeManager . GetOrHandshake ( gateways [ 0 ] . Addr ( ) , cacheCallback )
default :
// Multi gateway route, perform ECMP categorization
gatewayAddr , balancingOk := routing . BalancePacket ( fwPacket , gateways )
if ! balancingOk {
// This happens if the gateway buckets were not calculated, this _should_ never happen
f . l . Error ( "Gateway buckets not calculated, fallback from ECMP to random routing. Please report this bug." )
}
var handshakeInfoForChosenGateway * HandshakeHostInfo
var hhReceiver = func ( hh * HandshakeHostInfo ) {
handshakeInfoForChosenGateway = hh
}
// Store the handshakeHostInfo for later.
// If this node is not reachable we will attempt other nodes, if none are reachable we will
// cache the packet for this gateway.
if hostinfo , ready = f . handshakeManager . GetOrHandshake ( gatewayAddr , hhReceiver ) ; ready {
return hostinfo , true
}
// It appears the selected gateway cannot be reached, find another gateway to fallback on.
// The current implementation breaks ECMP but that seems better than no connectivity.
// If ECMP is also required when a gateway is down then connectivity status
// for each gateway needs to be kept and the weights recalculated when they go up or down.
// This would also need to interact with unsafe_route updates through reloading the config or
// use of the use_system_route_table option
if f . l . Level >= logrus . DebugLevel {
f . l . WithField ( "destination" , destinationAddr ) .
WithField ( "originalGateway" , gatewayAddr ) .
Debugln ( "Calculated gateway for ECMP not available, attempting other gateways" )
2020-08-04 22:59:04 -04:00
}
2025-03-24 23:15:59 +01:00
for i := range gateways {
// Skip the gateway that failed previously
if gateways [ i ] . Addr ( ) == gatewayAddr {
continue
}
// We do not need the HandshakeHostInfo since we cache the packet in the originally chosen gateway
if hostinfo , ready = f . handshakeManager . GetOrHandshake ( gateways [ i ] . Addr ( ) , nil ) ; ready {
return hostinfo , true
}
}
// No gateways reachable, cache the packet in the originally chosen gateway
cacheCallback ( handshakeInfoForChosenGateway )
return hostinfo , false
2019-12-12 16:34:17 +00:00
}
2019-11-19 17:00:20 +00:00
}
2023-05-04 15:16:37 -05:00
func ( f * Interface ) sendMessageNow ( t header . MessageType , st header . MessageSubType , hostinfo * HostInfo , p , nb , out [ ] byte ) {
2021-11-03 20:54:04 -05:00
fp := & firewall . Packet { }
2019-11-19 17:00:20 +00:00
err := newPacket ( p , false , fp )
if err != nil {
2021-03-26 09:46:30 -05:00
f . l . Warnf ( "error while parsing outgoing packet for firewall check; %v" , err )
2019-11-19 17:00:20 +00:00
return
}
// check if packet is in outbound fw rules
2024-04-11 21:44:22 -05:00
dropReason := f . firewall . Drop ( * fp , false , hostinfo , f . pki . GetCAPool ( ) , nil )
2020-06-10 14:55:49 -07:00
if dropReason != nil {
2021-03-26 09:46:30 -05:00
if f . l . Level >= logrus . DebugLevel {
f . l . WithField ( "fwPacket" , fp ) .
2020-06-10 14:55:49 -07:00
WithField ( "reason" , dropReason ) .
Debugln ( "dropping cached packet" )
}
2019-11-19 17:00:20 +00:00
return
}
2024-07-31 10:18:56 -05:00
f . sendNoMetrics ( header . Message , st , hostinfo . ConnectionState , hostinfo , netip . AddrPort { } , p , nb , out , 0 )
2019-11-19 17:00:20 +00:00
}
2025-03-06 11:28:26 -06:00
// SendMessageToVpnAddr handles real addr:port lookup and sends to the current best known address for vpnAddr
func ( f * Interface ) SendMessageToVpnAddr ( t header . MessageType , st header . MessageSubType , vpnAddr netip . Addr , p , nb , out [ ] byte ) {
2025-03-24 23:15:59 +01:00
hostInfo , ready := f . getOrHandshakeNoRouting ( vpnAddr , func ( hh * HandshakeHostInfo ) {
2023-11-02 16:53:59 -05:00
hh . cachePacket ( f . l , t , st , p , f . SendMessageToHostInfo , f . cachedPacketMetrics )
2023-08-21 18:51:45 -05:00
} )
2020-08-04 22:59:04 -04:00
if hostInfo == nil {
2021-03-26 09:46:30 -05:00
if f . l . Level >= logrus . DebugLevel {
2025-03-06 11:28:26 -06:00
f . l . WithField ( "vpnAddr" , vpnAddr ) .
Debugln ( "dropping SendMessageToVpnAddr, vpnAddr not in our vpn networks or in unsafe routes" )
2020-08-04 22:59:04 -04:00
}
return
}
2019-11-19 17:00:20 +00:00
2023-08-21 18:51:45 -05:00
if ! ready {
return
2019-11-19 17:00:20 +00:00
}
2023-05-04 15:16:37 -05:00
f . SendMessageToHostInfo ( t , st , hostInfo , p , nb , out )
2019-11-19 17:00:20 +00:00
}
2023-05-04 15:16:37 -05:00
func ( f * Interface ) SendMessageToHostInfo ( t header . MessageType , st header . MessageSubType , hi * HostInfo , p , nb , out [ ] byte ) {
f . send ( t , st , hi . ConnectionState , hi , p , nb , out )
2022-06-21 14:35:23 -04:00
}
func ( f * Interface ) send ( t header . MessageType , st header . MessageSubType , ci * ConnectionState , hostinfo * HostInfo , p , nb , out [ ] byte ) {
f . messageMetrics . Tx ( t , st , 1 )
2024-07-31 10:18:56 -05:00
f . sendNoMetrics ( t , st , ci , hostinfo , netip . AddrPort { } , p , nb , out , 0 )
2019-11-19 17:00:20 +00:00
}
2024-07-31 10:18:56 -05:00
func ( f * Interface ) sendTo ( t header . MessageType , st header . MessageSubType , ci * ConnectionState , hostinfo * HostInfo , remote netip . AddrPort , p , nb , out [ ] byte ) {
2020-06-26 13:45:48 -04:00
f . messageMetrics . Tx ( t , st , 1 )
2021-02-25 15:01:14 -05:00
f . sendNoMetrics ( t , st , ci , hostinfo , remote , p , nb , out , 0 )
2020-06-26 13:45:48 -04:00
}
2023-08-21 18:51:45 -05:00
// SendVia sends a payload through a Relay tunnel. No authentication or encryption is done
2022-06-21 14:35:23 -04:00
// to the payload for the ultimate target host, making this a useful method for sending
// handshake messages to peers through relay tunnels.
// via is the HostInfo through which the message is relayed.
// ad is the plaintext data to authenticate, but not encrypt
// nb is a buffer used to store the nonce value, re-used for performance reasons.
// out is a buffer used to store the result of the Encrypt operation
// q indicates which writer to use to send the packet.
2023-04-07 14:28:37 -04:00
func ( f * Interface ) SendVia ( via * HostInfo ,
relay * Relay ,
2022-06-21 14:35:23 -04:00
ad ,
nb ,
out [ ] byte ,
nocopy bool ,
) {
2023-04-05 11:08:23 -04:00
if noiseutil . EncryptLockNeeded {
// NOTE: for goboring AESGCMTLS we need to lock because of the nonce check
via . ConnectionState . writeLock . Lock ( )
}
2022-10-31 13:37:41 -04:00
c := via . ConnectionState . messageCounter . Add ( 1 )
2022-06-21 14:35:23 -04:00
out = header . Encode ( out , header . Version , header . Message , header . MessageRelay , relay . RemoteIndex , c )
2025-07-03 09:58:37 -05:00
f . connectionManager . Out ( via )
2022-06-21 14:35:23 -04:00
// Authenticate the header and payload, but do not encrypt for this message type.
// The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload.
if len ( out ) + len ( ad ) + via . ConnectionState . eKey . Overhead ( ) > cap ( out ) {
2023-04-05 11:08:23 -04:00
if noiseutil . EncryptLockNeeded {
via . ConnectionState . writeLock . Unlock ( )
}
2022-06-21 14:35:23 -04:00
via . logger ( f . l ) .
WithField ( "outCap" , cap ( out ) ) .
WithField ( "payloadLen" , len ( ad ) ) .
WithField ( "headerLen" , len ( out ) ) .
WithField ( "cipherOverhead" , via . ConnectionState . eKey . Overhead ( ) ) .
Error ( "SendVia out buffer not large enough for relay" )
return
}
// The header bytes are written to the 'out' slice; Grow the slice to hold the header and associated data payload.
offset := len ( out )
out = out [ : offset + len ( ad ) ]
// In one call path, the associated data _is_ already stored in out. In other call paths, the associated data must
// be copied into 'out'.
if ! nocopy {
copy ( out [ offset : ] , ad )
}
var err error
out , err = via . ConnectionState . eKey . EncryptDanger ( out , out , nil , c , nb )
2023-04-05 11:08:23 -04:00
if noiseutil . EncryptLockNeeded {
via . ConnectionState . writeLock . Unlock ( )
}
2022-06-21 14:35:23 -04:00
if err != nil {
via . logger ( f . l ) . WithError ( err ) . Info ( "Failed to EncryptDanger in sendVia" )
return
}
err = f . writers [ 0 ] . WriteTo ( out , via . remote )
if err != nil {
via . logger ( f . l ) . WithError ( err ) . Info ( "Failed to WriteTo in sendVia" )
}
2023-05-04 15:16:37 -05:00
f . connectionManager . RelayUsed ( relay . LocalIndex )
2022-06-21 14:35:23 -04:00
}
2024-07-31 10:18:56 -05:00
func ( f * Interface ) sendNoMetrics ( t header . MessageType , st header . MessageSubType , ci * ConnectionState , hostinfo * HostInfo , remote netip . AddrPort , p , nb , out [ ] byte , q int ) {
2019-11-19 17:00:20 +00:00
if ci . eKey == nil {
2021-04-14 13:50:09 -05:00
return
2019-11-19 17:00:20 +00:00
}
2024-07-31 10:18:56 -05:00
useRelay := ! remote . IsValid ( ) && ! hostinfo . remote . IsValid ( )
2022-06-21 14:35:23 -04:00
fullOut := out
if useRelay {
if len ( out ) < header . Len {
// out always has a capacity of mtu, but not always a length greater than the header.Len.
// Grow it to make sure the next operation works.
out = out [ : header . Len ]
}
// Save a header's worth of data at the front of the 'out' buffer.
out = out [ header . Len : ]
}
2019-11-19 17:00:20 +00:00
2023-04-05 11:08:23 -04:00
if noiseutil . EncryptLockNeeded {
// NOTE: for goboring AESGCMTLS we need to lock because of the nonce check
ci . writeLock . Lock ( )
}
2022-10-31 13:37:41 -04:00
c := ci . messageCounter . Add ( 1 )
2019-11-19 17:00:20 +00:00
//l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p)
2021-11-03 20:54:04 -05:00
out = header . Encode ( out , header . Version , t , st , hostinfo . remoteIndexId , c )
2025-07-03 09:58:37 -05:00
f . connectionManager . Out ( hostinfo )
2019-11-19 17:00:20 +00:00
2021-03-01 19:06:01 -06:00
// Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against
2025-03-06 11:28:26 -06:00
// all our addrs and enable a faster roaming.
2021-11-03 20:54:04 -05:00
if t != header . CloseTunnel && hostinfo . lastRebindCount != f . rebindCount {
2021-03-01 19:06:01 -06:00
//NOTE: there is an update hole if a tunnel isn't used and exactly 256 rebinds occur before the tunnel is
// finally used again. This tunnel would eventually be torn down and recreated if this action didn't help.
2025-03-06 11:28:26 -06:00
f . lightHouse . QueryServer ( hostinfo . vpnAddrs [ 0 ] )
2021-03-01 19:06:01 -06:00
hostinfo . lastRebindCount = f . rebindCount
2021-03-26 09:46:30 -05:00
if f . l . Level >= logrus . DebugLevel {
2025-03-06 11:28:26 -06:00
f . l . WithField ( "vpnAddrs" , hostinfo . vpnAddrs ) . Debug ( "Lighthouse update triggered for punch due to rebind counter" )
2021-03-01 19:06:01 -06:00
}
}
2022-06-21 14:35:23 -04:00
var err error
2019-11-19 17:00:20 +00:00
out , err = ci . eKey . EncryptDanger ( out , out , p , c , nb )
2023-04-05 11:08:23 -04:00
if noiseutil . EncryptLockNeeded {
ci . writeLock . Unlock ( )
}
2019-11-19 17:00:20 +00:00
if err != nil {
2021-03-26 09:46:30 -05:00
hostinfo . logger ( f . l ) . WithError ( err ) .
2019-11-19 17:00:20 +00:00
WithField ( "udpAddr" , remote ) . WithField ( "counter" , c ) .
2021-03-05 21:18:33 -05:00
WithField ( "attemptedCounter" , c ) .
2019-11-19 17:00:20 +00:00
Error ( "Failed to encrypt outgoing packet" )
2021-04-14 13:50:09 -05:00
return
2019-11-19 17:00:20 +00:00
}
2024-07-31 10:18:56 -05:00
if remote . IsValid ( ) {
2022-06-21 14:35:23 -04:00
err = f . writers [ q ] . WriteTo ( out , remote )
if err != nil {
hostinfo . logger ( f . l ) . WithError ( err ) .
WithField ( "udpAddr" , remote ) . Error ( "Failed to write outgoing packet" )
}
2024-07-31 10:18:56 -05:00
} else if hostinfo . remote . IsValid ( ) {
2022-06-21 14:35:23 -04:00
err = f . writers [ q ] . WriteTo ( out , hostinfo . remote )
if err != nil {
hostinfo . logger ( f . l ) . WithError ( err ) .
WithField ( "udpAddr" , remote ) . Error ( "Failed to write outgoing packet" )
}
} else {
// Try to send via a relay
for _ , relayIP := range hostinfo . relayState . CopyRelayIps ( ) {
2025-03-06 11:28:26 -06:00
relayHostInfo , relay , err := f . hostMap . QueryVpnAddrsRelayFor ( hostinfo . vpnAddrs , relayIP )
2022-06-21 14:35:23 -04:00
if err != nil {
2023-05-04 15:16:37 -05:00
hostinfo . relayState . DeleteRelay ( relayIP )
2023-03-30 15:07:31 -05:00
hostinfo . logger ( f . l ) . WithField ( "relay" , relayIP ) . WithError ( err ) . Info ( "sendNoMetrics failed to find HostInfo" )
2022-06-21 14:35:23 -04:00
continue
}
f . SendVia ( relayHostInfo , relay , out , nb , fullOut [ : header . Len + len ( out ) ] , true )
break
}
2019-11-19 17:00:20 +00:00
}
}