Skip to content

Commit e15962a

Browse files
committed
Merge branch 'ipmr-ip6mr-allow-mc-routing-locally-generated-mc-packets'
Petr Machata says: ==================== ipmr, ip6mr: Allow MC-routing locally-generated MC packets Multicast routing is today handled in the input path. Locally generated MC packets don't hit the IPMR code. Thus if a VXLAN remote address is multicast, the driver needs to set an OIF during route lookup. In practice that means that MC routing configuration needs to be kept in sync with the VXLAN FDB and MDB. Ideally, the VXLAN packets would be routed by the MC routing code instead. To that end, this patchset adds support to route locally generated multicast packets. However, an installation that uses a VXLAN underlay netdevice for which it also has matching MC routes, would get a different routing with this patch. Previously, the MC packets would be delivered directly to the underlay port, whereas now they would be MC-routed. In order to avoid this change in behavior, introduce an IPCB/IP6CB flag. Unless the flag is set, the new MC-routing code is skipped. All this is keyed to a new VXLAN attribute, IFLA_VXLAN_MC_ROUTE. Only when it is set does any of the above engage. In addition to that, and as is the case today with MC forwarding, IPV4_DEVCONF_MC_FORWARDING must be enabled for the netdevice that acts as a source of MC traffic (i.e. the VXLAN PHYS_DEV), so an MC daemon must be attached to the netdevice. When a VXLAN netdevice with a MC remote is brought up, the physical netdevice joins the indicated MC group. This is important for local delivery of MC packets, so it is still necessary to configure a physical netdevice -- the parameter cannot go away. The netdevice would however typically not be a front panel port, but a dummy. An MC daemon would then sit on top of that netdevice as well as any front panel ports that it needs to service, and have routes set up between the two. A way to configure the VXLAN netdevice to take advantage of the new MC routing would be: # ip link add name d up type dummy # ip link add name vx10 up type vxlan id 1000 dstport 4789 \ local 192.0.2.1 group 225.0.0.1 ttl 16 dev d mrcoute # ip link set dev vx10 master br # plus vlans etc. With the following MC routes: (192.0.2.1, 225.0.0.1) iif=d oil=swp1,swp2 # TX route (*, 225.0.0.1) iif=swp1 oil=d,swp2 # RX route (*, 225.0.0.1) iif=swp2 oil=d,swp1 # RX route The RX path has not changed, with the exception of an extra MC hop. Packets are delivered to the front panel port and MC-forwarded to the VXLAN physical port, here "d". Since the port has joined the multicast group, the packets are locally delivered, and end up being processed by the VXLAN netdevice. This patchset is based on earlier patches from Nikolay Aleksandrov and Roopa Prabhu, though it underwent significant changes. Roopa broadly presented the topic on LPC 2019 [0]. Patchset progression: - Patches #1 to #4 add ip_mr_output() - Patches #5 to #10 add ip6_mr_output() - Patch #11 adds the VXLAN bits to enable MR engagement - Patches #12 to #14 prepare selftest libraries - Patch #15 includes a new test suite [0] https://www.youtube.com/watch?v=xlReECfi-uo ==================== Link: https://patch.msgid.link/cover.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents ccde408 + e318037 commit e15962a

File tree

33 files changed

+1226
-122
lines changed

33 files changed

+1226
-122
lines changed

drivers/net/amt.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,8 @@ static bool amt_send_membership_update(struct amt_dev *amt,
10461046
amt->gw_port,
10471047
amt->relay_port,
10481048
false,
1049-
false);
1049+
false,
1050+
0);
10501051
amt_update_gw_status(amt, AMT_STATUS_SENT_UPDATE, true);
10511052
return false;
10521053
}
@@ -1103,7 +1104,8 @@ static void amt_send_multicast_data(struct amt_dev *amt,
11031104
amt->relay_port,
11041105
tunnel->source_port,
11051106
false,
1106-
false);
1107+
false,
1108+
0);
11071109
}
11081110

11091111
static bool amt_send_membership_query(struct amt_dev *amt,
@@ -1161,7 +1163,8 @@ static bool amt_send_membership_query(struct amt_dev *amt,
11611163
amt->relay_port,
11621164
tunnel->source_port,
11631165
false,
1164-
false);
1166+
false,
1167+
0);
11651168
amt_update_relay_status(tunnel, AMT_STATUS_SENT_QUERY, true);
11661169
return false;
11671170
}

drivers/net/bareudp.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
362362
udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst,
363363
tos, ttl, df, sport, bareudp->port,
364364
!net_eq(bareudp->net, dev_net(bareudp->dev)),
365-
!test_bit(IP_TUNNEL_CSUM_BIT,
366-
info->key.tun_flags));
365+
!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
366+
0);
367367
return 0;
368368

369369
free_dst:
@@ -431,7 +431,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
431431
&saddr, &daddr, prio, ttl,
432432
info->key.label, sport, bareudp->port,
433433
!test_bit(IP_TUNNEL_CSUM_BIT,
434-
info->key.tun_flags));
434+
info->key.tun_flags),
435+
0);
435436
return 0;
436437

437438
free_dst:

drivers/net/geneve.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -921,8 +921,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
921921
udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
922922
tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
923923
!net_eq(geneve->net, dev_net(geneve->dev)),
924-
!test_bit(IP_TUNNEL_CSUM_BIT,
925-
info->key.tun_flags));
924+
!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
925+
0);
926926
return 0;
927927
}
928928

@@ -1014,7 +1014,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
10141014
&saddr, &key->u.ipv6.dst, prio, ttl,
10151015
info->key.label, sport, geneve->cfg.info.key.tp_dst,
10161016
!test_bit(IP_TUNNEL_CSUM_BIT,
1017-
info->key.tun_flags));
1017+
info->key.tun_flags),
1018+
0);
10181019
return 0;
10191020
}
10201021
#endif

drivers/net/gtp.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,8 @@ static int gtp0_send_echo_resp_ip(struct gtp_dev *gtp, struct sk_buff *skb)
446446
htons(GTP0_PORT), htons(GTP0_PORT),
447447
!net_eq(sock_net(gtp->sk1u),
448448
dev_net(gtp->dev)),
449-
false);
449+
false,
450+
0);
450451

451452
return 0;
452453
}
@@ -704,7 +705,8 @@ static int gtp1u_send_echo_resp(struct gtp_dev *gtp, struct sk_buff *skb)
704705
htons(GTP1U_PORT), htons(GTP1U_PORT),
705706
!net_eq(sock_net(gtp->sk1u),
706707
dev_net(gtp->dev)),
707-
false);
708+
false,
709+
0);
708710
return 0;
709711
}
710712

@@ -1304,7 +1306,7 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
13041306
pktinfo.gtph_port, pktinfo.gtph_port,
13051307
!net_eq(sock_net(pktinfo.pctx->sk),
13061308
dev_net(dev)),
1307-
false);
1309+
false, 0);
13081310
break;
13091311
case AF_INET6:
13101312
#if IS_ENABLED(CONFIG_IPV6)
@@ -1314,7 +1316,7 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
13141316
ip6_dst_hoplimit(&pktinfo.rt->dst),
13151317
0,
13161318
pktinfo.gtph_port, pktinfo.gtph_port,
1317-
false);
1319+
false, 0);
13181320
#else
13191321
goto tx_err;
13201322
#endif
@@ -2405,7 +2407,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info)
24052407
port, port,
24062408
!net_eq(sock_net(sk),
24072409
dev_net(gtp->dev)),
2408-
false);
2410+
false, 0);
24092411
return 0;
24102412
}
24112413

drivers/net/ovpn/udp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
199199
transmit:
200200
udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0,
201201
ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
202-
fl.fl4_dport, false, sk->sk_no_check_tx);
202+
fl.fl4_dport, false, sk->sk_no_check_tx, 0);
203203
ret = 0;
204204
err:
205205
local_bh_enable();
@@ -274,7 +274,7 @@ static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
274274
skb->ignore_df = 1;
275275
udp_tunnel6_xmit_skb(dst, sk, skb, skb->dev, &fl.saddr, &fl.daddr, 0,
276276
ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
277-
fl.fl6_dport, udp_get_no_check6_tx(sk));
277+
fl.fl6_dport, udp_get_no_check6_tx(sk), 0);
278278
ret = 0;
279279
err:
280280
local_bh_enable();

drivers/net/vxlan/vxlan_core.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
24512451
rcu_read_lock();
24522452
if (addr_family == AF_INET) {
24532453
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
2454+
u16 ipcb_flags = 0;
24542455
struct rtable *rt;
24552456
__be16 df = 0;
24562457
__be32 saddr;
@@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
24672468
goto tx_error;
24682469
}
24692470

2471+
if (flags & VXLAN_F_MC_ROUTE)
2472+
ipcb_flags |= IPSKB_MCROUTE;
2473+
24702474
if (!info) {
24712475
/* Bypass encapsulation if the destination is local */
24722476
err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
25222526

25232527
udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
25242528
pkey->u.ipv4.dst, tos, ttl, df,
2525-
src_port, dst_port, xnet, !udp_sum);
2529+
src_port, dst_port, xnet, !udp_sum,
2530+
ipcb_flags);
25262531
#if IS_ENABLED(CONFIG_IPV6)
25272532
} else {
25282533
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
25292534
struct in6_addr saddr;
2535+
u16 ip6cb_flags = 0;
25302536

25312537
if (!ifindex)
25322538
ifindex = sock6->sock->sk->sk_bound_dev_if;
@@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
25422548
goto tx_error;
25432549
}
25442550

2551+
if (flags & VXLAN_F_MC_ROUTE)
2552+
ip6cb_flags |= IP6SKB_MCROUTE;
2553+
25452554
if (!info) {
25462555
u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;
25472556

@@ -2586,7 +2595,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
25862595

25872596
udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
25882597
&saddr, &pkey->u.ipv6.dst, tos, ttl,
2589-
pkey->label, src_port, dst_port, !udp_sum);
2598+
pkey->label, src_port, dst_port, !udp_sum,
2599+
ip6cb_flags);
25902600
#endif
25912601
}
25922602
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3401,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
34013411
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
34023412
[IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
34033413
[IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
3414+
[IFLA_VXLAN_MC_ROUTE] = NLA_POLICY_MAX(NLA_U8, 1),
34043415
};
34053416

34063417
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4314,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
43144325
return err;
43154326
}
43164327

4328+
if (data[IFLA_VXLAN_MC_ROUTE]) {
4329+
err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE,
4330+
VXLAN_F_MC_ROUTE, changelink,
4331+
true, extack);
4332+
if (err)
4333+
return err;
4334+
}
4335+
43174336
if (tb[IFLA_MTU]) {
43184337
if (changelink) {
43194338
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],

drivers/net/wireguard/socket.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
8484
skb->ignore_df = 1;
8585
udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
8686
ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
87-
fl.fl4_dport, false, false);
87+
fl.fl4_dport, false, false, 0);
8888
goto out;
8989

9090
err:
@@ -151,7 +151,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
151151
skb->ignore_df = 1;
152152
udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
153153
ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
154-
fl.fl6_dport, false);
154+
fl.fl6_dport, false, 0);
155155
goto out;
156156

157157
err:

include/linux/ipv6.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ struct inet6_skb_parm {
156156
#define IP6SKB_SEG6 256
157157
#define IP6SKB_FAKEJUMBO 512
158158
#define IP6SKB_MULTIPATH 1024
159+
#define IP6SKB_MCROUTE 2048
159160
};
160161

161162
#if defined(CONFIG_NET_L3_MASTER_DEV)

include/linux/mroute6.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
3131
extern int ip6_mr_input(struct sk_buff *skb);
3232
extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
3333
extern int ip6_mr_init(void);
34+
extern int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb);
3435
extern void ip6_mr_cleanup(void);
3536
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg);
3637
#else
@@ -58,6 +59,12 @@ static inline int ip6_mr_init(void)
5859
return 0;
5960
}
6061

62+
static inline int
63+
ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
64+
{
65+
return ip6_output(net, sk, skb);
66+
}
67+
6168
static inline void ip6_mr_cleanup(void)
6269
{
6370
return;

include/net/ip.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct inet_skb_parm {
5959
#define IPSKB_L3SLAVE BIT(7)
6060
#define IPSKB_NOPOLICY BIT(8)
6161
#define IPSKB_MULTIPATH BIT(9)
62+
#define IPSKB_MCROUTE BIT(10)
6263

6364
u16 frag_max_size;
6465
};
@@ -167,6 +168,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
167168
int ip_local_deliver(struct sk_buff *skb);
168169
void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto);
169170
int ip_mr_input(struct sk_buff *skb);
171+
int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb);
170172
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
171173
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
172174
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,

0 commit comments

Comments
 (0)