Skip to content

Commit

Permalink
Implement packet sniffing with less overhead using tc-mirred
Browse files Browse the repository at this point in the history
We use packet sniffing based on an expensive AF-PACKET to learn app
IPs so that they can be reported to the user. Additionally, we capture
DNS packets to log DNS requests as part of flow logging. This can be done
more efficiently by mirroring DHCP, ARP and few other protocols that
we are interested in into a dummy interface and capture packets there.
This avoids cloning/copying every packet. Only those few we need
will be cloned in the kernel.

I think that the main problem is that AF-PACKET + BPF filter is only
optimized in the Linux kernel for locally delivered traffic. The filter
is applied before the packet is copied for delivery into the user-space
via AF-PACKET socket. This avoids copying packets which are not matched
by the filter. But in our case, we are interested in the forwarded
traffic and therefore have to set bridge to the promiscuous mode for
the packet sniffing. This then significantly degrades the performance
because *every* packet forwarded by the bridge is skb_clone-d before
it reaches the BPF filter.

The solution is to use tc-filter and the ingress qdisc to mirror packets
that we are interested in (ICMP, DHCP, ARP, DNS) into a separate dummy
interface. Then we can run pcap on this dummy interface instead and avoid
putting network instance bridge into the promiscuous mode.

Signed-off-by: Milan Lenco <milan@zededa.com>
  • Loading branch information
milan-zededa authored and eriknordmark committed Nov 26, 2024
1 parent 2dfd31b commit cdc6a49
Show file tree
Hide file tree
Showing 14 changed files with 713 additions and 43 deletions.
1 change: 1 addition & 0 deletions pkg/pillar/cmd/zedrouter/networkinstance.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func (z *zedrouter) getArgsForNIStateCollecting(niID uuid.UUID) (
br.NI = niID
br.BrNum = niStatus.BridgeNum
br.BrIfName = niStatus.BridgeName
br.MirrorIfName = niStatus.MirrorIfName
br.BrIfMAC = niStatus.BridgeMac
// Find all app instances that (actively) use this network.
apps := z.pubAppNetworkStatus.GetAll()
Expand Down
4 changes: 4 additions & 0 deletions pkg/pillar/cmd/zedrouter/zedrouter.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,10 @@ func (z *zedrouter) processNIReconcileStatus(recStatus nireconciler.NIReconcileS
niStatus.BridgeName = recStatus.BrIfName
changed = true
}
if niStatus.MirrorIfName != recStatus.MirrorIfName {
niStatus.MirrorIfName = recStatus.MirrorIfName
changed = true
}
if !recStatus.InProgress {
if niStatus.ChangeInProgress != types.ChangeInProgressTypeNone {
niStatus.ChangeInProgress = types.ChangeInProgressTypeNone
Expand Down
166 changes: 160 additions & 6 deletions pkg/pillar/nireconciler/linux_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,14 @@ import (
// | | +-----------------------------------------+ | |
// | | | BlackHole | | |
// | | | | | |
// | | | +-----------+ +---------+ | | |
// | | | | DummyIf | | Route | | | |
// | | | +-----------+ +---------+ | | |
// | | | +-------------------+ +---------+ | | |
// | | | | DummyIf | | Route | | | |
// | | | | (for packet drop) | +---------+ | | |
// | | | +-------------------+ | | |
// | | | | | |
// | | | +----------------+ +----------+ | | |
// | | | | IptablesRule | | IPRule | | | |
// | | | +----------------+ +----------+ | | |
// | | | +----------------+ +----------+ | | |
// | | | | IptablesRule | | IPRule | | | |
// | | | +----------------+ +----------+ | | |
// | | +-----------------------------------------+ | |
// | | | |
// | | +----------------------------------------------------+ | |
Expand Down Expand Up @@ -139,6 +140,20 @@ import (
// | | +----------------------------------------------------+ | |
// | | | |
// | | +----------------------------------------------------+ | |
// | | | Mirroring (for Switch NI) | | |
// | | | | | |
// | | | +------------------------+ | | |
// | | | | DummyIf | | | |
// | | | | (for mirrored packets) | | | |
// | | | +------------------------+ | | |
// | | | | | |
// | | | +---------------+ +---------------+ | | |
// | | | | TCIngress | | TCMirror | | | |
// | | | | (device port) | ... | (device port) | ... | | |
// | | | +---------------+ +---------------+ | | |
// | | +----------------------------------------------------+ | |
// | | | |
// | | +----------------------------------------------------+ | |
// | | | Services | | |
// | | | | | |
// | | | +---------------+ +-----------------------+ | | |
Expand Down Expand Up @@ -172,6 +187,12 @@ import (
// | | | | (for L2 NI) | | (for L2 NI) | | (eids) | | | |
// | | | +-------------+ +-------------+ +----------+ | | |
// | | | | | |
// | | | +-----------------+ +------------------+ | | |
// | | | | TCIngress | | TCMirror | | | |
// | | | | (for VIF) | | (for VIF) | ... | | |
// | | | | (for Switch NI) | | (for Switch NI) | ... | | |
// | | | +-----------------+ +------------------+ | | |
// | | | | | |
// | | | +--------------------------------------------+ | | |
// | | | | ACLs | | | |
// | | | | | | | |
Expand Down Expand Up @@ -235,6 +256,9 @@ const (
// L3SG : subgraph with configuration items for a given NI related to Layer3
// of the ISO/OSI model.
L3SG = "L3"
// NIMirroringSG : subgraph with items belonging to a given NI that collectively
// implement mirroring of some traffic for monitoring purposes.
NIMirroringSG = "Mirroring"
// NIServicesSG : subgraph with items belonging to a given NI that collectively
// provide various services for connected applications, such as DHCP, DNS, cloud-init
// metadata, etc.
Expand Down Expand Up @@ -535,6 +559,9 @@ func (r *LinuxNIReconciler) getIntendedNICfg(niID uuid.UUID) dg.Graph {
if !ni.bridge.IPConflict {
intendedCfg.PutSubGraph(r.getIntendedNIL2Cfg(niID))
intendedCfg.PutSubGraph(r.getIntendedNIL3Cfg(niID))
if ni.config.Type == types.NetworkInstanceTypeSwitch {
intendedCfg.PutSubGraph(r.getIntendedNIMirroring(niID))
}
intendedCfg.PutSubGraph(r.getIntendedNIServices(niID))
}
for _, app := range r.apps {
Expand Down Expand Up @@ -885,6 +912,116 @@ func (r *LinuxNIReconciler) getIntendedNIL3Cfg(niID uuid.UUID) dg.Graph {
return intendedL3Cfg
}

// Mirror small portion of the traffic for monitoring
// (learning app IPs, logging DNS requests, etc.).
func (r *LinuxNIReconciler) getIntendedNIMirroring(niID uuid.UUID) dg.Graph {
ni := r.nis[niID]
graphArgs := dg.InitArgs{
Name: NIMirroringSG,
Description: "Network instance packet mirroring (for DHCP, DNS, ICMP and ARP)",
}
intendedNIMirroring := dg.New(graphArgs)
intendedNIMirroring.PutItem(linux.DummyIf{IfName: ni.mirrorIfName}, nil)
for _, port := range ni.bridge.Ports {
ifName := portPhysIfName(port)
portIfRef := generic.NetworkIf{
IfName: ifName,
ItemRef: dg.Reference(generic.Port{IfName: ifName}),
}
intendedNIMirroring.PutItem(linux.TCIngress{NetIf: portIfRef}, nil)
for _, mirrorRule := range r.getIntendedNIMirrorRules(niID, portIfRef) {
intendedNIMirroring.PutItem(mirrorRule, nil)
}
}
return intendedNIMirroring
}

func (r *LinuxNIReconciler) getIntendedNIMirrorRules(niID uuid.UUID,
fromNetIf generic.NetworkIf) []linux.TCMirror {
var rules []linux.TCMirror
ni := r.nis[niID]
mirrorIfRef := generic.NetworkIf{
IfName: ni.mirrorIfName,
ItemRef: dg.Reference(linux.DummyIf{IfName: ni.mirrorIfName}),
}
// Protocol numbers (https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml)
udp := uint8(17)
tcp := uint8(6)
icmpv6 := uint8(58)
// Rule 1: mirror DHCP replies.
rules = append(rules, linux.TCMirror{
RulePriority: 1,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv4,
TransportProtocol: &udp,
TransportSrcPort: 67,
})
// Rule 2: mirror DHCPv6 replies.
rules = append(rules, linux.TCMirror{
RulePriority: 2,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv6,
TransportProtocol: &udp,
TransportSrcPort: 547,
})
// Rules 3-6: mirror DNS replies.
// Configured only if flow logging is enabled.
if ni.config.EnableFlowlog {
rules = append(rules, linux.TCMirror{
RulePriority: 3,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv4,
TransportProtocol: &udp,
TransportSrcPort: 53,
})
rules = append(rules, linux.TCMirror{
RulePriority: 4,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv4,
TransportProtocol: &tcp,
TransportSrcPort: 53,
})
rules = append(rules, linux.TCMirror{
RulePriority: 5,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv6,
TransportProtocol: &udp,
TransportSrcPort: 53,
})
rules = append(rules, linux.TCMirror{
RulePriority: 6,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv6,
TransportProtocol: &tcp,
TransportSrcPort: 53,
})
}
// Rule 7: mirror all ARP packets.
rules = append(rules, linux.TCMirror{
RulePriority: 7,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoARP,
})
// Rule 8: mirror ICMPv6 Neighbor Solicitation messages.
neighSolicitation := uint8(135)
rules = append(rules, linux.TCMirror{
RulePriority: 8,
FromNetIf: fromNetIf,
ToNetIf: mirrorIfRef,
Protocol: linux.TCMatchProtoIPv6,
TransportProtocol: &icmpv6,
ICMPType: &neighSolicitation,
})
return rules
}

func (r *LinuxNIReconciler) getIntendedNIServices(niID uuid.UUID) dg.Graph {
graphArgs := dg.InitArgs{
Name: NIServicesSG,
Expand Down Expand Up @@ -1385,6 +1522,16 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID,
VLANConfig: vlanConfig,
}, nil)
}
// Mirror small portion of the traffic for monitoring
// (learning app IPs, logging DNS requests, etc.).
vifRef := generic.NetworkIf{
IfName: vif.hostIfName,
ItemRef: dg.Reference(linux.VIF{HostIfName: vif.hostIfName}),
}
intendedAppConnCfg.PutItem(linux.TCIngress{NetIf: vifRef}, nil)
for _, mirrorRule := range r.getIntendedNIMirrorRules(niID, vifRef) {
intendedAppConnCfg.PutItem(mirrorRule, nil)
}
}
// Create ipset with all the addresses from the DNSNameToIPList plus the VIF IP itself.
var ips []net.IP
Expand Down Expand Up @@ -1447,6 +1594,13 @@ func (r *LinuxNIReconciler) generateBridgeIfName(
return brIfName, nil
}

// Interface name for dummy interface where ARP, ICMP, DNS and DHCP packets
// are mirrored for monitoring purposes.
func (r *LinuxNIReconciler) mirrorIfName(brIfName string) string {
// Keep it short, Linux limits interface name to 15 characters.
return brIfName + "-m"
}

func (r *LinuxNIReconciler) generateVifHostIfName(vifNum, appNum int) string {
return fmt.Sprintf("%s%dx%d", vifIfNamePrefix, vifNum, appNum)
}
Expand Down
52 changes: 37 additions & 15 deletions pkg/pillar/nireconciler/linux_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,12 @@ const (
)

type niInfo struct {
config types.NetworkInstanceConfig
bridge NIBridge
brIfName string
deleted bool
status NIReconcileStatus
config types.NetworkInstanceConfig
bridge NIBridge
brIfName string
mirrorIfName string
deleted bool
status NIReconcileStatus
}

type appInfo struct {
Expand Down Expand Up @@ -629,13 +630,14 @@ func (r *LinuxNIReconciler) updateNIStatus(
intSG := r.intendedState.SubGraph(sgName)
inProgress, failedItems := r.getSubgraphState(intSG, currSG, false)
niStatus = NIReconcileStatus{
NI: niID,
Deleted: deleted,
BrIfName: brIfName,
BrIfIndex: brIfIndex,
InProgress: inProgress,
FailedItems: failedItems,
Routes: r.getNIRouteInfo(niID),
NI: niID,
Deleted: deleted,
BrIfName: brIfName,
BrIfIndex: brIfIndex,
MirrorIfName: niInfo.mirrorIfName,
InProgress: inProgress,
FailedItems: failedItems,
Routes: r.getNIRouteInfo(niID),
}
if !niInfo.status.Equal(niStatus) {
changed = true
Expand Down Expand Up @@ -815,10 +817,17 @@ func (r *LinuxNIReconciler) AddNI(ctx context.Context,
if err != nil {
return niStatus, err
}
// Mirroring for efficient monitoring purposes is only implemented for the Switch
// network instance.
var mirrorIfName string
if niConfig.Type == types.NetworkInstanceTypeSwitch {
mirrorIfName = r.mirrorIfName(brIfName)
}
r.nis[niID] = &niInfo{
config: niConfig,
bridge: br,
brIfName: brIfName,
config: niConfig,
bridge: br,
brIfName: brIfName,
mirrorIfName: mirrorIfName,
}
reconcileReason := fmt.Sprintf("adding new NI (%v)", niID)
// Rebuild and reconcile also global config to update the set of intended/current
Expand Down Expand Up @@ -855,6 +864,9 @@ func (r *LinuxNIReconciler) UpdateNI(ctx context.Context,
return niStatus, err
}
r.nis[niID].brIfName = brIfName
if niConfig.Type == types.NetworkInstanceTypeSwitch {
r.nis[niID].mirrorIfName = r.mirrorIfName(brIfName)
}
reconcileReason := fmt.Sprintf("updating NI (%v)", niID)
// Get the current state of external items to be used by NI.
r.updateCurrentNIState(niID)
Expand Down Expand Up @@ -1130,6 +1142,16 @@ func (r *LinuxNIReconciler) getSubgraphState(intSG, currSG dg.GraphR, forApp boo
return true
}
}
if tcIngress, isTcIngress := item.(linux.TCIngress); isTcIngress {
if tcIngress.NetIf.ItemRef.ItemType == linux.VIFTypename {
return true
}
}
if tcMirror, isTcMirror := item.(linux.TCMirror); isTcMirror {
if tcMirror.FromNetIf.ItemRef.ItemType == linux.VIFTypename {
return true
}
}
return false
}
ignoreExtraItem := func(item dg.Item) bool {
Expand Down
14 changes: 14 additions & 0 deletions pkg/pillar/nireconciler/linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,8 @@ func TestIPv4LocalAndSwitchNIs(test *testing.T) {
t.Expect(niStatus.Deleted).To(BeFalse())
t.Expect(niStatus.InProgress).To(BeFalse())
t.Expect(niStatus.BrIfName).To(Equal("bn1"))
// Traffic mirroring for Local NI is not implemented.
t.Expect(niStatus.MirrorIfName).To(BeEmpty())
t.Expect(niStatus.FailedItems).To(BeEmpty())
t.Expect(niStatus.Routes).To(HaveLen(1))
t.Expect(niStatus.Routes[0].IsDefaultRoute()).To(BeTrue())
Expand Down Expand Up @@ -1532,6 +1534,7 @@ func TestIPv4LocalAndSwitchNIs(test *testing.T) {
t.Expect(niStatus.Deleted).To(BeFalse())
t.Expect(niStatus.InProgress).To(BeFalse())
t.Expect(niStatus.BrIfName).To(Equal("eth1"))
t.Expect(niStatus.MirrorIfName).To(Equal("eth1-m"))
t.Expect(niStatus.FailedItems).To(BeEmpty())

t.Eventually(updatesCh).Should(Receive(&recUpdate))
Expand All @@ -1544,6 +1547,10 @@ func TestIPv4LocalAndSwitchNIs(test *testing.T) {
genericitems.HTTPServer{
ListenIf: genericitems.NetworkIf{IfName: "eth1"}, Port: 80,
}))).To(BeFalse())
t.Expect(itemIsCreated(dg.Reference(
linuxitems.DummyIf{IfName: "eth1-m"}))).To(BeTrue())
t.Expect(itemCountWithType(linuxitems.TCIngressTypename)).To(Equal(1))
t.Expect(itemCountWithType(linuxitems.TCMirrorTypename)).To(Equal(4))

// Simulate eth1 getting an IP address.
eth1.IPAddrs = eth1IPs
Expand Down Expand Up @@ -1596,6 +1603,9 @@ func TestIPv4LocalAndSwitchNIs(test *testing.T) {
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

t.Expect(itemCountWithType(linuxitems.TCIngressTypename)).To(Equal(1))
t.Expect(itemCountWithType(linuxitems.TCMirrorTypename)).To(Equal(4))

// Simulate domainmgr creating all VIFs.
networkMonitor.AddOrUpdateInterface(app2VIF1)
t.Eventually(updatesCh).Should(Receive(&recUpdate))
Expand All @@ -1618,6 +1628,10 @@ func TestIPv4LocalAndSwitchNIs(test *testing.T) {
t.Expect(recUpdate.AppConnStatus.VIFs[i].FailedItems).To(BeEmpty())
}

// 2 VIFs and one device port are connected to switch NI.
t.Expect(itemCountWithType(linuxitems.TCIngressTypename)).To(Equal(1 * 3))
t.Expect(itemCountWithType(linuxitems.TCMirrorTypename)).To(Equal(4 * 3))

ni1PortMapRule1 := iptables.Rule{
RuleLabel: "User-configured PORTMAP ACL rule 1 for port eth0 IP 192.168.10.5 from inside",
Table: "nat",
Expand Down
2 changes: 2 additions & 0 deletions pkg/pillar/nireconciler/linuxitems/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ func RegisterItems(log *base.LogObject, registry *reconciler.DefaultRegistry,
{c: &SysctlConfigurator{Log: log}, t: SysctlTypename},
{c: &VIFConfigurator{Log: log}, t: VIFTypename},
{c: &BPDUGuardConfigurator{Log: log}, t: BPDUGuardTypename},
{c: &TCIngressConfigurator{Log: log}, t: TCIngressTypename},
{c: &TCMirrorConfigurator{Log: log}, t: TCMirrorTypename},
}
for _, configurator := range configurators {
err := registry.Register(configurator.c, configurator.t)
Expand Down
Loading

0 comments on commit cdc6a49

Please sign in to comment.