@@ -1192,6 +1192,45 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
11921192 unparsed -> vx_flags &= ~VXLAN_GBP_USED_BITS ;
11931193}
11941194
1195+ static bool vxlan_parse_gpe_hdr (struct vxlanhdr * unparsed ,
1196+ __be32 * protocol ,
1197+ struct sk_buff * skb , u32 vxflags )
1198+ {
1199+ struct vxlanhdr_gpe * gpe = (struct vxlanhdr_gpe * )unparsed ;
1200+
1201+ /* Need to have Next Protocol set for interfaces in GPE mode. */
1202+ if (!gpe -> np_applied )
1203+ return false;
1204+ /* "The initial version is 0. If a receiver does not support the
1205+ * version indicated it MUST drop the packet.
1206+ */
1207+ if (gpe -> version != 0 )
1208+ return false;
1209+ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
1210+ * processing MUST occur." However, we don't implement OAM
1211+ * processing, thus drop the packet.
1212+ */
1213+ if (gpe -> oam_flag )
1214+ return false;
1215+
1216+ switch (gpe -> next_protocol ) {
1217+ case VXLAN_GPE_NP_IPV4 :
1218+ * protocol = htons (ETH_P_IP );
1219+ break ;
1220+ case VXLAN_GPE_NP_IPV6 :
1221+ * protocol = htons (ETH_P_IPV6 );
1222+ break ;
1223+ case VXLAN_GPE_NP_ETHERNET :
1224+ * protocol = htons (ETH_P_TEB );
1225+ break ;
1226+ default :
1227+ return false;
1228+ }
1229+
1230+ unparsed -> vx_flags &= ~VXLAN_GPE_USED_BITS ;
1231+ return true;
1232+ }
1233+
11951234static bool vxlan_set_mac (struct vxlan_dev * vxlan ,
11961235 struct vxlan_sock * vs ,
11971236 struct sk_buff * skb )
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
12571296 struct vxlanhdr unparsed ;
12581297 struct vxlan_metadata _md ;
12591298 struct vxlan_metadata * md = & _md ;
1299+ __be32 protocol = htons (ETH_P_TEB );
1300+ bool raw_proto = false;
12601301 void * oiph ;
12611302
1262- /* Need Vxlan and inner Ethernet header to be present */
1303+ /* Need UDP and VXLAN header to be present */
12631304 if (!pskb_may_pull (skb , VXLAN_HLEN ))
12641305 return 1 ;
12651306
@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
12831324 if (!vxlan )
12841325 goto drop ;
12851326
1286- if (iptunnel_pull_header (skb , VXLAN_HLEN , htons (ETH_P_TEB ),
1287- !net_eq (vxlan -> net , dev_net (vxlan -> dev ))))
1288- goto drop ;
1327+ /* For backwards compatibility, only allow reserved fields to be
1328+ * used by VXLAN extensions if explicitly requested.
1329+ */
1330+ if (vs -> flags & VXLAN_F_GPE ) {
1331+ if (!vxlan_parse_gpe_hdr (& unparsed , & protocol , skb , vs -> flags ))
1332+ goto drop ;
1333+ raw_proto = true;
1334+ }
1335+
1336+ if (__iptunnel_pull_header (skb , VXLAN_HLEN , protocol , raw_proto ,
1337+ !net_eq (vxlan -> net , dev_net (vxlan -> dev ))))
1338+ goto drop ;
12891339
12901340 if (vxlan_collect_metadata (vs )) {
12911341 __be32 vni = vxlan_vni (vxlan_hdr (skb )-> vx_vni );
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
13041354 memset (md , 0 , sizeof (* md ));
13051355 }
13061356
1307- /* For backwards compatibility, only allow reserved fields to be
1308- * used by VXLAN extensions if explicitly requested.
1309- */
13101357 if (vs -> flags & VXLAN_F_REMCSUM_RX )
13111358 if (!vxlan_remcsum (& unparsed , skb , vs -> flags ))
13121359 goto drop ;
13131360 if (vs -> flags & VXLAN_F_GBP )
13141361 vxlan_parse_gbp_hdr (& unparsed , skb , vs -> flags , md );
1362+ /* Note that GBP and GPE can never be active together. This is
1363+ * ensured in vxlan_dev_configure.
1364+ */
13151365
13161366 if (unparsed .vx_flags || unparsed .vx_vni ) {
13171367 /* If there are any unprocessed flags remaining treat
@@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
13251375 goto drop ;
13261376 }
13271377
1328- if (!vxlan_set_mac (vxlan , vs , skb ))
1329- goto drop ;
1378+ if (!raw_proto ) {
1379+ if (!vxlan_set_mac (vxlan , vs , skb ))
1380+ goto drop ;
1381+ } else {
1382+ skb -> dev = vxlan -> dev ;
1383+ skb -> pkt_type = PACKET_HOST ;
1384+ }
13301385
13311386 oiph = skb_network_header (skb );
13321387 skb_reset_network_header (skb );
@@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
16851740 gbp -> policy_id = htons (md -> gbp & VXLAN_GBP_ID_MASK );
16861741}
16871742
1743+ static int vxlan_build_gpe_hdr (struct vxlanhdr * vxh , u32 vxflags ,
1744+ __be16 protocol )
1745+ {
1746+ struct vxlanhdr_gpe * gpe = (struct vxlanhdr_gpe * )vxh ;
1747+
1748+ gpe -> np_applied = 1 ;
1749+
1750+ switch (protocol ) {
1751+ case htons (ETH_P_IP ):
1752+ gpe -> next_protocol = VXLAN_GPE_NP_IPV4 ;
1753+ return 0 ;
1754+ case htons (ETH_P_IPV6 ):
1755+ gpe -> next_protocol = VXLAN_GPE_NP_IPV6 ;
1756+ return 0 ;
1757+ case htons (ETH_P_TEB ):
1758+ gpe -> next_protocol = VXLAN_GPE_NP_ETHERNET ;
1759+ return 0 ;
1760+ }
1761+ return - EPFNOSUPPORT ;
1762+ }
1763+
16881764static int vxlan_build_skb (struct sk_buff * skb , struct dst_entry * dst ,
16891765 int iphdr_len , __be32 vni ,
16901766 struct vxlan_metadata * md , u32 vxflags ,
@@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
16941770 int min_headroom ;
16951771 int err ;
16961772 int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL ;
1773+ __be16 inner_protocol = htons (ETH_P_TEB );
16971774
16981775 if ((vxflags & VXLAN_F_REMCSUM_TX ) &&
16991776 skb -> ip_summed == CHECKSUM_PARTIAL ) {
@@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
17121789
17131790 /* Need space for new headers (invalidates iph ptr) */
17141791 err = skb_cow_head (skb , min_headroom );
1715- if (unlikely (err )) {
1716- kfree_skb (skb );
1717- return err ;
1718- }
1792+ if (unlikely (err ))
1793+ goto out_free ;
17191794
17201795 skb = vlan_hwaccel_push_inside (skb );
17211796 if (WARN_ON (!skb ))
@@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
17441819
17451820 if (vxflags & VXLAN_F_GBP )
17461821 vxlan_build_gbp_hdr (vxh , vxflags , md );
1822+ if (vxflags & VXLAN_F_GPE ) {
1823+ err = vxlan_build_gpe_hdr (vxh , vxflags , skb -> protocol );
1824+ if (err < 0 )
1825+ goto out_free ;
1826+ inner_protocol = skb -> protocol ;
1827+ }
17471828
1748- skb_set_inner_protocol (skb , htons ( ETH_P_TEB ) );
1829+ skb_set_inner_protocol (skb , inner_protocol );
17491830 return 0 ;
1831+
1832+ out_free :
1833+ kfree_skb (skb );
1834+ return err ;
17501835}
17511836
17521837static struct rtable * vxlan_get_route (struct vxlan_dev * vxlan ,
@@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
24212506 .ndo_fill_metadata_dst = vxlan_fill_metadata_dst ,
24222507};
24232508
2509+ static const struct net_device_ops vxlan_netdev_raw_ops = {
2510+ .ndo_init = vxlan_init ,
2511+ .ndo_uninit = vxlan_uninit ,
2512+ .ndo_open = vxlan_open ,
2513+ .ndo_stop = vxlan_stop ,
2514+ .ndo_start_xmit = vxlan_xmit ,
2515+ .ndo_get_stats64 = ip_tunnel_get_stats64 ,
2516+ .ndo_change_mtu = vxlan_change_mtu ,
2517+ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst ,
2518+ };
2519+
24242520/* Info for udev, that this is a virtual tunnel endpoint */
24252521static struct device_type vxlan_type = {
24262522 .name = "vxlan" ,
@@ -2500,6 +2596,17 @@ static void vxlan_ether_setup(struct net_device *dev)
25002596 dev -> netdev_ops = & vxlan_netdev_ether_ops ;
25012597}
25022598
2599+ static void vxlan_raw_setup (struct net_device * dev )
2600+ {
2601+ dev -> type = ARPHRD_NONE ;
2602+ dev -> hard_header_len = 0 ;
2603+ dev -> addr_len = 0 ;
2604+ dev -> mtu = ETH_DATA_LEN ;
2605+ dev -> tx_queue_len = 1000 ;
2606+ dev -> flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST ;
2607+ dev -> netdev_ops = & vxlan_netdev_raw_ops ;
2608+ }
2609+
25032610static const struct nla_policy vxlan_policy [IFLA_VXLAN_MAX + 1 ] = {
25042611 [IFLA_VXLAN_ID ] = { .type = NLA_U32 },
25052612 [IFLA_VXLAN_GROUP ] = { .len = FIELD_SIZEOF (struct iphdr , daddr ) },
@@ -2526,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
25262633 [IFLA_VXLAN_REMCSUM_TX ] = { .type = NLA_U8 },
25272634 [IFLA_VXLAN_REMCSUM_RX ] = { .type = NLA_U8 },
25282635 [IFLA_VXLAN_GBP ] = { .type = NLA_FLAG , },
2636+ [IFLA_VXLAN_GPE ] = { .type = NLA_FLAG , },
25292637 [IFLA_VXLAN_REMCSUM_NOPARTIAL ] = { .type = NLA_FLAG },
25302638};
25312639
@@ -2726,7 +2834,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27262834 __be16 default_port = vxlan -> cfg .dst_port ;
27272835 struct net_device * lowerdev = NULL ;
27282836
2729- vxlan_ether_setup (dev );
2837+ if (conf -> flags & VXLAN_F_GPE ) {
2838+ if (conf -> flags & ~VXLAN_F_ALLOWED_GPE )
2839+ return - EINVAL ;
2840+ /* For now, allow GPE only together with COLLECT_METADATA.
2841+ * This can be relaxed later; in such case, the other side
2842+ * of the PtP link will have to be provided.
2843+ */
2844+ if (!(conf -> flags & VXLAN_F_COLLECT_METADATA ))
2845+ return - EINVAL ;
2846+
2847+ vxlan_raw_setup (dev );
2848+ } else {
2849+ vxlan_ether_setup (dev );
2850+ }
27302851
27312852 vxlan -> net = src_net ;
27322853
@@ -2789,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27892910 dev -> needed_headroom = needed_headroom ;
27902911
27912912 memcpy (& vxlan -> cfg , conf , sizeof (* conf ));
2792- if (!vxlan -> cfg .dst_port )
2793- vxlan -> cfg .dst_port = default_port ;
2913+ if (!vxlan -> cfg .dst_port ) {
2914+ if (conf -> flags & VXLAN_F_GPE )
2915+ vxlan -> cfg .dst_port = 4790 ; /* IANA assigned VXLAN-GPE port */
2916+ else
2917+ vxlan -> cfg .dst_port = default_port ;
2918+ }
27942919 vxlan -> flags |= conf -> flags ;
27952920
27962921 if (!vxlan -> cfg .age_interval )
@@ -2961,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
29613086 if (data [IFLA_VXLAN_GBP ])
29623087 conf .flags |= VXLAN_F_GBP ;
29633088
3089+ if (data [IFLA_VXLAN_GPE ])
3090+ conf .flags |= VXLAN_F_GPE ;
3091+
29643092 if (data [IFLA_VXLAN_REMCSUM_NOPARTIAL ])
29653093 conf .flags |= VXLAN_F_REMCSUM_NOPARTIAL ;
29663094
@@ -2977,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
29773105 case - EEXIST :
29783106 pr_info ("duplicate VNI %u\n" , be32_to_cpu (conf .vni ));
29793107 break ;
3108+
3109+ case - EINVAL :
3110+ pr_info ("unsupported combination of extensions\n" );
3111+ break ;
29803112 }
29813113
29823114 return err ;
@@ -3104,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
31043236 nla_put_flag (skb , IFLA_VXLAN_GBP ))
31053237 goto nla_put_failure ;
31063238
3239+ if (vxlan -> flags & VXLAN_F_GPE &&
3240+ nla_put_flag (skb , IFLA_VXLAN_GPE ))
3241+ goto nla_put_failure ;
3242+
31073243 if (vxlan -> flags & VXLAN_F_REMCSUM_NOPARTIAL &&
31083244 nla_put_flag (skb , IFLA_VXLAN_REMCSUM_NOPARTIAL ))
31093245 goto nla_put_failure ;
0 commit comments