Skip to content

Commit

Permalink
nft_set_pipapo: Introduce AVX2-based lookup implementation
Browse files Browse the repository at this point in the history
If the AVX2 set is available, we can exploit the repetitive
characteristic of this algorithm to provide a fast, vectorised
version by using 256-bit wide AVX2 operations for bucket loads and
bitwise intersections.

In most cases, this implementation consistently outperforms rbtree
set instances despite the fact they are configured to use a given,
single, ranged data type out of the ones used for performance
measurements by the nft_concat_range.sh kselftest.

That script, injecting packets directly on the ingoing device path
with pktgen, reports, averaged over five runs on a single AMD Epyc
7402 thread (3.35GHz, 768 KiB L1D$, 12 MiB L2$), the figures below.
CONFIG_RETPOLINE was not set here.

Note that this is not a fair comparison over hash and rbtree set
types: non-ranged entries (used to have a reference for hash types)
would be matched faster than this, and matching on a single field
only (which is the case for rbtree) is also significantly faster.

However, it's not possible at the moment to choose this set type
for non-ranged entries, and the current implementation also needs
a few minor adjustments in order to match on less than two fields.

 ---------------.-----------------------------------.------------.
 AMD Epyc 7402  |          baselines, Mpps          | this patch |
  1 thread      |___________________________________|____________|
  3.35GHz       |        |        |        |        |            |
  768KiB L1D$   | netdev |  hash  | rbtree |        |            |
 ---------------|  hook  |   no   | single |        |   pipapo   |
 type   entries |  drop  | ranges | field  | pipapo |    AVX2    |
 ---------------|--------|--------|--------|--------|------------|
 net,port       |        |        |        |        |            |
          1000  |   19.0 |   10.4 |    3.8 |    4.0 | 7.5   +87% |
 ---------------|--------|--------|--------|--------|------------|
 port,net       |        |        |        |        |            |
           100  |   18.8 |   10.3 |    5.8 |    6.3 | 8.1   +29% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port      |        |        |        |        |            |
          1000  |   16.4 |    7.6 |    1.8 |    2.1 | 4.8  +128% |
 ---------------|--------|--------|--------|--------|------------|
 port,proto     |        |        |        |        |            |
         30000  |   19.6 |   11.6 |    3.9 |    0.5 | 2.6  +420% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port,mac  |        |        |        |        |            |
            10  |   16.5 |    5.4 |    4.3 |    3.4 | 4.7   +38% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port,mac, |        |        |        |        |            |
 proto    1000  |   16.5 |    5.7 |    1.9 |    1.4 | 3.6   +26% |
 ---------------|--------|--------|--------|--------|------------|
 net,mac        |        |        |        |        |            |
          1000  |   19.0 |    8.4 |    3.9 |    2.5 | 6.4  +156% |
 ---------------'--------'--------'--------'--------'------------'

A similar strategy could be easily reused to implement specialised
versions for other SIMD sets, and I plan to post at least a NEON
version at a later time.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
sbrivio-rh authored and ummakynes committed Mar 15, 2020
1 parent 8683f4b commit 7400b06
Show file tree
Hide file tree
Showing 6 changed files with 1,270 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/net/netfilter/nf_tables_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ extern const struct nft_set_type nft_set_hash_fast_type;
extern const struct nft_set_type nft_set_rbtree_type;
extern const struct nft_set_type nft_set_bitmap_type;
extern const struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_pipapo_avx2_type;

struct nft_expr;
struct nft_regs;
Expand Down
6 changes: 6 additions & 0 deletions net/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o

ifdef CONFIG_X86_64
ifneq (,$(findstring -DCONFIG_AS_AVX2=1,$(KBUILD_CFLAGS)))
nf_tables-objs += nft_set_pipapo_avx2.o
endif
endif

obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
Expand Down
3 changes: 3 additions & 0 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -3272,6 +3272,9 @@ static const struct nft_set_type *nft_set_types[] = {
&nft_set_rhash_type,
&nft_set_bitmap_type,
&nft_set_rbtree_type,
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
&nft_set_pipapo_avx2_type,
#endif
&nft_set_pipapo_type,
};

Expand Down
24 changes: 24 additions & 0 deletions net/netfilter/nft_set_pipapo.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@
#include <linux/bitmap.h>
#include <linux/bitops.h>

#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"

/* Current working bitmap index, toggled between field matches */
Expand Down Expand Up @@ -2174,3 +2175,26 @@ const struct nft_set_type nft_set_pipapo_type = {
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};

#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
const struct nft_set_type nft_set_pipapo_avx2_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT,
.ops = {
.lookup = nft_pipapo_avx2_lookup,
.insert = nft_pipapo_insert,
.activate = nft_pipapo_activate,
.deactivate = nft_pipapo_deactivate,
.flush = nft_pipapo_flush,
.remove = nft_pipapo_remove,
.walk = nft_pipapo_walk,
.get = nft_pipapo_get,
.privsize = nft_pipapo_privsize,
.estimate = nft_pipapo_avx2_estimate,
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
#endif
Loading

0 comments on commit 7400b06

Please sign in to comment.