Skip to content

Commit 76aed95

Browse files
q2venPaolo Abeni
authored and
Paolo Abeni
committed
rtnetlink: Add per-netns RTNL.
The goal is to break RTNL down into per-netns mutex. This patch adds per-netns mutex and its helper functions, rtnl_net_lock() and rtnl_net_unlock(). rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and rtnl_net_unlock() releases them. We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes rtnl_lock() in rtnl_net_lock(). When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(), and its locking order is defined by rtnl_net_lock_cmp_fn() as follows: 1. init_net is first 2. netns address ascending order Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL with LOCKDEP so that we can carefully add the extra mutex without slowing down RTNL operations during conversion. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent ec763c2 commit 76aed95

File tree

5 files changed

+104
-0
lines changed

5 files changed

+104
-0
lines changed

include/linux/rtnetlink.h

+21
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void)
9292
#define rcu_replace_pointer_rtnl(rp, p) \
9393
rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())
9494

95+
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
96+
void __rtnl_net_lock(struct net *net);
97+
void __rtnl_net_unlock(struct net *net);
98+
void rtnl_net_lock(struct net *net);
99+
void rtnl_net_unlock(struct net *net);
100+
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
101+
#else
102+
static inline void __rtnl_net_lock(struct net *net) {}
103+
static inline void __rtnl_net_unlock(struct net *net) {}
104+
105+
static inline void rtnl_net_lock(struct net *net)
106+
{
107+
rtnl_lock();
108+
}
109+
110+
static inline void rtnl_net_unlock(struct net *net)
111+
{
112+
rtnl_unlock();
113+
}
114+
#endif
115+
95116
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
96117
{
97118
return rtnl_dereference(dev->ingress_queue);

include/net/net_namespace.h

+4
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,10 @@ struct net {
188188
#if IS_ENABLED(CONFIG_SMC)
189189
struct netns_smc smc;
190190
#endif
191+
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
192+
/* Move to a better place when the config guard is removed. */
193+
struct mutex rtnl_mutex;
194+
#endif
191195
} __randomize_layout;
192196

193197
#include <linux/seq_file_net.h>

net/Kconfig.debug

+15
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,18 @@ config DEBUG_NET
2424
help
2525
Enable extra sanity checks in networking.
2626
This is mostly used by fuzzers, but is safe to select.
27+
28+
config DEBUG_NET_SMALL_RTNL
29+
bool "Add extra per-netns mutex inside RTNL"
30+
depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
31+
select PROVE_LOCKING
32+
default n
33+
help
34+
rtnl_lock() is being replaced with rtnl_net_lock() that
35+
acquires the global RTNL and a small per-netns RTNL mutex.
36+
37+
During the conversion, rtnl_net_lock() just adds an extra
38+
mutex in every RTNL scope and slows down the operations.
39+
40+
Once the conversion completes, rtnl_lock() will be removed
41+
and rtnetlink will gain per-netns scalability.

net/core/net_namespace.c

+6
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
334334
idr_init(&net->netns_ids);
335335
spin_lock_init(&net->nsid_lock);
336336
mutex_init(&net->ipv4.ra_mutex);
337+
338+
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
339+
mutex_init(&net->rtnl_mutex);
340+
lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
341+
#endif
342+
337343
preinit_net_sysctl(net);
338344
}
339345

net/core/rtnetlink.c

+58
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void)
179179
EXPORT_SYMBOL(lockdep_rtnl_is_held);
180180
#endif /* #ifdef CONFIG_PROVE_LOCKING */
181181

182+
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
183+
void __rtnl_net_lock(struct net *net)
184+
{
185+
ASSERT_RTNL();
186+
187+
mutex_lock(&net->rtnl_mutex);
188+
}
189+
EXPORT_SYMBOL(__rtnl_net_lock);
190+
191+
void __rtnl_net_unlock(struct net *net)
192+
{
193+
ASSERT_RTNL();
194+
195+
mutex_unlock(&net->rtnl_mutex);
196+
}
197+
EXPORT_SYMBOL(__rtnl_net_unlock);
198+
199+
void rtnl_net_lock(struct net *net)
200+
{
201+
rtnl_lock();
202+
__rtnl_net_lock(net);
203+
}
204+
EXPORT_SYMBOL(rtnl_net_lock);
205+
206+
void rtnl_net_unlock(struct net *net)
207+
{
208+
__rtnl_net_unlock(net);
209+
rtnl_unlock();
210+
}
211+
EXPORT_SYMBOL(rtnl_net_unlock);
212+
213+
static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
214+
{
215+
if (net_eq(net_a, net_b))
216+
return 0;
217+
218+
/* always init_net first */
219+
if (net_eq(net_a, &init_net))
220+
return -1;
221+
222+
if (net_eq(net_b, &init_net))
223+
return 1;
224+
225+
/* otherwise lock in ascending order */
226+
return net_a < net_b ? -1 : 1;
227+
}
228+
229+
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
230+
{
231+
const struct net *net_a, *net_b;
232+
233+
net_a = container_of(a, struct net, rtnl_mutex.dep_map);
234+
net_b = container_of(b, struct net, rtnl_mutex.dep_map);
235+
236+
return rtnl_net_cmp_locks(net_a, net_b);
237+
}
238+
#endif
239+
182240
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
183241

184242
static inline int rtm_msgindex(int msgtype)

0 commit comments

Comments
 (0)