Skip to content

Commit

Permalink
Merge tag 'nf-24-02-29' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

Patch #1 restores NFPROTO_INET with nft_compat, from Ignat Korchagin.

Patch #2 fixes an issue with bridge netfilter and broadcast/multicast
packets.

There is a day 0 bug in br_netfilter when used with connection tracking.

Conntrack assumes that an nf_conn structure that is not yet added to
hash table ("unconfirmed"), is only visible by the current cpu that is
processing the sk_buff.

For bridge this isn't true, sk_buff can get cloned in between, and
clones can be processed in parallel on different cpu.

This patch disables NAT and conntrack helpers for multicast packets.

Patch #3 adds a selftest to cover for the br_netfilter bug.

netfilter pull request 24-02-29

* tag 'nf-24-02-29' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  selftests: netfilter: add bridge conntrack + multicast test case
  netfilter: bridge: confirm multicast packets before passing them up the stack
  netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
====================

Link: https://lore.kernel.org/r/20240229000135.8780-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Feb 29, 2024
2 parents 51dd4ee + 6523cf5 commit b611b77
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ struct nf_ct_hook {
const struct sk_buff *);
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
void (*set_closing)(struct nf_conntrack *nfct);
int (*confirm)(struct sk_buff *skb);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;

Expand Down
96 changes: 96 additions & 0 deletions net/bridge/br_netfilter_hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack_core.h>
#endif

static unsigned int brnf_net_id __read_mostly;

struct brnf_net {
Expand Down Expand Up @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
* the same nf_conn entry, which will happen for multicast (broadcast)
* Frames on bridges.
*
* Example:
* macvlan0
* br0
* ethX ethY
*
* ethX (or Y) receives multicast or broadcast packet containing
* an IP packet, not yet in conntrack table.
*
* 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
* -> skb->_nfct now references a unconfirmed entry
* 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
* interface.
* 3. skb gets passed up the stack.
* 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
* and schedules a work queue to send them out on the lower devices.
*
* The clone skb->_nfct is not a copy, it is the same entry as the
* original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
* 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
*
* The Macvlan broadcast worker and normal confirm path will race.
*
* This race will not happen if step 2 already confirmed a clone. In that
* case later steps perform skb_clone() with skb->_nfct already confirmed (in
* hash table). This works fine.
*
* But such confirmation won't happen when eb/ip/nftables rules dropped the
* packets before they reached the nf_confirm step in postrouting.
*
* Work around this problem by explicit confirmation of the entry at
* LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
* entry.
*
*/
static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;

if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;

ct = container_of(nfct, struct nf_conn, ct_general);
if (likely(nf_ct_is_confirmed(ct)))
return NF_ACCEPT;

WARN_ON_ONCE(skb_shared(skb));
WARN_ON_ONCE(refcount_read(&nfct->use) != 1);

/* We can't call nf_confirm here, it would create a dependency
* on nf_conntrack module.
*/
ct_hook = rcu_dereference(nf_ct_hook);
if (!ct_hook) {
skb->_nfct = 0ul;
nf_conntrack_put(nfct);
return NF_ACCEPT;
}

nf_bridge_pull_encap_header(skb);
ret = ct_hook->confirm(skb);
switch (ret & NF_VERDICT_MASK) {
case NF_STOLEN:
return NF_STOLEN;
default:
nf_bridge_push_encap_header(skb);
break;
}

ct = container_of(nfct, struct nf_conn, ct_general);
WARN_ON_ONCE(!nf_ct_is_confirmed(ct));

return ret;
}
#endif

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
Expand Down Expand Up @@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
{
.hook = br_nf_local_in,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_LAST,
},
#endif
{
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
Expand Down
30 changes: 30 additions & 0 deletions net/bridge/netfilter/nf_conntrack_bridge.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}

static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;

if (skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;

/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;

/* let inet prerouting call conntrack again */
skb->_nfct = 0;
nf_ct_put(ct);

return NF_ACCEPT;
}

static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
Expand Down Expand Up @@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = nf_ct_bridge_in,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
Expand Down
1 change: 1 addition & 0 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
.confirm = __nf_conntrack_confirm,
};

void nf_conntrack_init_end(void)
Expand Down
20 changes: 20 additions & 0 deletions net/netfilter/nft_compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,

if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;

ret = nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING));
if (ret)
return ret;

if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
Expand Down Expand Up @@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,

if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;

ret = nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING));
if (ret)
return ret;

if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
Expand Down
3 changes: 2 additions & 1 deletion tools/testing/selftests/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
conntrack_sctp_collision.sh xt_string.sh
conntrack_sctp_collision.sh xt_string.sh \
bridge_netfilter.sh

HOSTPKG_CONFIG := pkg-config

Expand Down
Loading

0 comments on commit b611b77

Please sign in to comment.