Skip to content

Commit 71d8c47

Browse files
committed
netfilter: conntrack: introduce clash resolution on insertion race
This patch introduces nf_ct_resolve_clash() to resolve race condition on conntrack insertions. This is particularly a problem for connection-less protocols such as UDP, with no initial handshake. Two or more packets may race to insert the entry resulting in packet drops. Another problematic scenario are packets enqueued to userspace via NFQUEUE after the raw table, that make it easier to trigger this race. To resolve this, the idea is to reset the conntrack entry to the one that won race. Packet and bytes counters are also merged. The 'insert_failed' stats still accounts for this situation, after this patch, the drop counter is bumped whenever we drop packets, so we can watch for unresolved clashes. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent ba76738 commit 71d8c47

File tree

4 files changed

+57
-3
lines changed

4 files changed

+57
-3
lines changed

include/net/netfilter/nf_conntrack_l4proto.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ struct nf_conntrack_l4proto {
2323
/* L4 Protocol number. */
2424
u_int8_t l4proto;
2525

26+
/* Resolve clashes on insertion races. */
27+
bool allow_clash;
28+
2629
/* Try to fill in the third arg: dataoff is offset past network protocol
2730
hdr. Return true if possible. */
2831
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,

net/netfilter/nf_conntrack_core.c

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,48 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
617617
}
618618
}
619619

620+
static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
621+
const struct nf_conn *loser_ct)
622+
{
623+
struct nf_conn_acct *acct;
624+
625+
acct = nf_conn_acct_find(loser_ct);
626+
if (acct) {
627+
struct nf_conn_counter *counter = acct->counter;
628+
enum ip_conntrack_info ctinfo;
629+
unsigned int bytes;
630+
631+
/* u32 should be fine since we must have seen one packet. */
632+
bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
633+
nf_ct_acct_update(ct, ctinfo, bytes);
634+
}
635+
}
636+
637+
/* Resolve race on insertion if this protocol allows this. */
638+
static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
639+
enum ip_conntrack_info ctinfo,
640+
struct nf_conntrack_tuple_hash *h)
641+
{
642+
/* This is the conntrack entry already in hashes that won race. */
643+
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
644+
struct nf_conntrack_l4proto *l4proto;
645+
646+
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
647+
if (l4proto->allow_clash &&
648+
!nf_ct_is_dying(ct) &&
649+
atomic_inc_not_zero(&ct->ct_general.use)) {
650+
nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
651+
nf_conntrack_put(skb->nfct);
652+
/* Assign conntrack already in hashes to this skbuff. Don't
653+
* modify skb->nfctinfo to ensure consistent stateful filtering.
654+
*/
655+
skb->nfct = &ct->ct_general;
656+
return NF_ACCEPT;
657+
}
658+
NF_CT_STAT_INC(net, drop);
659+
return NF_DROP;
660+
}
661+
620662
/* Confirm a connection given skb; places it in hash table */
621663
int
622664
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -631,6 +673,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
631673
enum ip_conntrack_info ctinfo;
632674
struct net *net;
633675
unsigned int sequence;
676+
int ret = NF_DROP;
634677

635678
ct = nf_ct_get(skb, &ctinfo);
636679
net = nf_ct_net(ct);
@@ -673,8 +716,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
673716
*/
674717
nf_ct_del_from_dying_or_unconfirmed_list(ct);
675718

676-
if (unlikely(nf_ct_is_dying(ct)))
677-
goto out;
719+
if (unlikely(nf_ct_is_dying(ct))) {
720+
nf_ct_add_to_dying_list(ct);
721+
goto dying;
722+
}
678723

679724
/* See if there's one in the list already, including reverse:
680725
NAT could have grabbed it without realizing, since we're
@@ -725,10 +770,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
725770

726771
out:
727772
nf_ct_add_to_dying_list(ct);
773+
ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
774+
dying:
728775
nf_conntrack_double_unlock(hash, reply_hash);
729776
NF_CT_STAT_INC(net, insert_failed);
730777
local_bh_enable();
731-
return NF_DROP;
778+
return ret;
732779
}
733780
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
734781

net/netfilter/nf_conntrack_proto_udp.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
309309
.l3proto = PF_INET,
310310
.l4proto = IPPROTO_UDP,
311311
.name = "udp",
312+
.allow_clash = true,
312313
.pkt_to_tuple = udp_pkt_to_tuple,
313314
.invert_tuple = udp_invert_tuple,
314315
.print_tuple = udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
341342
.l3proto = PF_INET6,
342343
.l4proto = IPPROTO_UDP,
343344
.name = "udp",
345+
.allow_clash = true,
344346
.pkt_to_tuple = udp_pkt_to_tuple,
345347
.invert_tuple = udp_invert_tuple,
346348
.print_tuple = udp_print_tuple,

net/netfilter/nf_conntrack_proto_udplite.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
274274
.l3proto = PF_INET,
275275
.l4proto = IPPROTO_UDPLITE,
276276
.name = "udplite",
277+
.allow_clash = true,
277278
.pkt_to_tuple = udplite_pkt_to_tuple,
278279
.invert_tuple = udplite_invert_tuple,
279280
.print_tuple = udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
306307
.l3proto = PF_INET6,
307308
.l4proto = IPPROTO_UDPLITE,
308309
.name = "udplite",
310+
.allow_clash = true,
309311
.pkt_to_tuple = udplite_pkt_to_tuple,
310312
.invert_tuple = udplite_invert_tuple,
311313
.print_tuple = udplite_print_tuple,

0 commit comments

Comments
 (0)