@@ -37,6 +37,70 @@ void reuseport_has_conns_set(struct sock *sk)
3737}
3838EXPORT_SYMBOL (reuseport_has_conns_set );
3939
40+ static void __reuseport_get_incoming_cpu (struct sock_reuseport * reuse )
41+ {
42+ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
43+ WRITE_ONCE (reuse -> incoming_cpu , reuse -> incoming_cpu + 1 );
44+ }
45+
46+ static void __reuseport_put_incoming_cpu (struct sock_reuseport * reuse )
47+ {
48+ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
49+ WRITE_ONCE (reuse -> incoming_cpu , reuse -> incoming_cpu - 1 );
50+ }
51+
52+ static void reuseport_get_incoming_cpu (struct sock * sk , struct sock_reuseport * reuse )
53+ {
54+ if (sk -> sk_incoming_cpu >= 0 )
55+ __reuseport_get_incoming_cpu (reuse );
56+ }
57+
58+ static void reuseport_put_incoming_cpu (struct sock * sk , struct sock_reuseport * reuse )
59+ {
60+ if (sk -> sk_incoming_cpu >= 0 )
61+ __reuseport_put_incoming_cpu (reuse );
62+ }
63+
64+ void reuseport_update_incoming_cpu (struct sock * sk , int val )
65+ {
66+ struct sock_reuseport * reuse ;
67+ int old_sk_incoming_cpu ;
68+
69+ if (unlikely (!rcu_access_pointer (sk -> sk_reuseport_cb ))) {
70+ /* Paired with REAE_ONCE() in sk_incoming_cpu_update()
71+ * and compute_score().
72+ */
73+ WRITE_ONCE (sk -> sk_incoming_cpu , val );
74+ return ;
75+ }
76+
77+ spin_lock_bh (& reuseport_lock );
78+
79+ /* This must be done under reuseport_lock to avoid a race with
80+ * reuseport_grow(), which accesses sk->sk_incoming_cpu without
81+ * lock_sock() when detaching a shutdown()ed sk.
82+ *
83+ * Paired with READ_ONCE() in reuseport_select_sock_by_hash().
84+ */
85+ old_sk_incoming_cpu = sk -> sk_incoming_cpu ;
86+ WRITE_ONCE (sk -> sk_incoming_cpu , val );
87+
88+ reuse = rcu_dereference_protected (sk -> sk_reuseport_cb ,
89+ lockdep_is_held (& reuseport_lock ));
90+
91+ /* reuseport_grow() has detached a closed sk. */
92+ if (!reuse )
93+ goto out ;
94+
95+ if (old_sk_incoming_cpu < 0 && val >= 0 )
96+ __reuseport_get_incoming_cpu (reuse );
97+ else if (old_sk_incoming_cpu >= 0 && val < 0 )
98+ __reuseport_put_incoming_cpu (reuse );
99+
100+ out :
101+ spin_unlock_bh (& reuseport_lock );
102+ }
103+
40104static int reuseport_sock_index (struct sock * sk ,
41105 const struct sock_reuseport * reuse ,
42106 bool closed )
@@ -64,6 +128,7 @@ static void __reuseport_add_sock(struct sock *sk,
64128 /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
65129 smp_wmb ();
66130 reuse -> num_socks ++ ;
131+ reuseport_get_incoming_cpu (sk , reuse );
67132}
68133
69134static bool __reuseport_detach_sock (struct sock * sk ,
@@ -76,6 +141,7 @@ static bool __reuseport_detach_sock(struct sock *sk,
76141
77142 reuse -> socks [i ] = reuse -> socks [reuse -> num_socks - 1 ];
78143 reuse -> num_socks -- ;
144+ reuseport_put_incoming_cpu (sk , reuse );
79145
80146 return true;
81147}
@@ -86,6 +152,7 @@ static void __reuseport_add_closed_sock(struct sock *sk,
86152 reuse -> socks [reuse -> max_socks - reuse -> num_closed_socks - 1 ] = sk ;
87153 /* paired with READ_ONCE() in inet_csk_bind_conflict() */
88154 WRITE_ONCE (reuse -> num_closed_socks , reuse -> num_closed_socks + 1 );
155+ reuseport_get_incoming_cpu (sk , reuse );
89156}
90157
91158static bool __reuseport_detach_closed_sock (struct sock * sk ,
@@ -99,6 +166,7 @@ static bool __reuseport_detach_closed_sock(struct sock *sk,
99166 reuse -> socks [i ] = reuse -> socks [reuse -> max_socks - reuse -> num_closed_socks ];
100167 /* paired with READ_ONCE() in inet_csk_bind_conflict() */
101168 WRITE_ONCE (reuse -> num_closed_socks , reuse -> num_closed_socks - 1 );
169+ reuseport_put_incoming_cpu (sk , reuse );
102170
103171 return true;
104172}
@@ -166,6 +234,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
166234 reuse -> bind_inany = bind_inany ;
167235 reuse -> socks [0 ] = sk ;
168236 reuse -> num_socks = 1 ;
237+ reuseport_get_incoming_cpu (sk , reuse );
169238 rcu_assign_pointer (sk -> sk_reuseport_cb , reuse );
170239
171240out :
@@ -209,6 +278,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
209278 more_reuse -> reuseport_id = reuse -> reuseport_id ;
210279 more_reuse -> bind_inany = reuse -> bind_inany ;
211280 more_reuse -> has_conns = reuse -> has_conns ;
281+ more_reuse -> incoming_cpu = reuse -> incoming_cpu ;
212282
213283 memcpy (more_reuse -> socks , reuse -> socks ,
214284 reuse -> num_socks * sizeof (struct sock * ));
@@ -458,18 +528,32 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
458528static struct sock * reuseport_select_sock_by_hash (struct sock_reuseport * reuse ,
459529 u32 hash , u16 num_socks )
460530{
531+ struct sock * first_valid_sk = NULL ;
461532 int i , j ;
462533
463534 i = j = reciprocal_scale (hash , num_socks );
464- while (reuse -> socks [i ]-> sk_state == TCP_ESTABLISHED ) {
535+ do {
536+ struct sock * sk = reuse -> socks [i ];
537+
538+ if (sk -> sk_state != TCP_ESTABLISHED ) {
539+ /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */
540+ if (!READ_ONCE (reuse -> incoming_cpu ))
541+ return sk ;
542+
543+ /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */
544+ if (READ_ONCE (sk -> sk_incoming_cpu ) == raw_smp_processor_id ())
545+ return sk ;
546+
547+ if (!first_valid_sk )
548+ first_valid_sk = sk ;
549+ }
550+
465551 i ++ ;
466552 if (i >= num_socks )
467553 i = 0 ;
468- if (i == j )
469- return NULL ;
470- }
554+ } while (i != j );
471555
472- return reuse -> socks [ i ] ;
556+ return first_valid_sk ;
473557}
474558
475559/**
0 commit comments