Skip to content

Commit 37734aa

Browse files
authored
L3mdev cgroup (#73)
* add driver-l3mdev-cgroup patch Signed-off-by: Guohan Lu <gulv@microsoft.com> * update abiname to 8-1 Signed-off-by: Guohan Lu <gulv@microsoft.com>
1 parent d631eeb commit 37734aa

6 files changed

+386
-1
lines changed

Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
SHELL = /bin/bash
33
.SHELLFLAGS += -e
44

5-
KVERSION_SHORT ?= 4.9.0-8
5+
KERNEL_ABI_MINOR_VERSION = 1
6+
KVERSION_SHORT ?= 4.9.0-8-$(KERNEL_ABI_MINOR_VERSION)
67
KVERSION ?= $(KVERSION_SHORT)-amd64
78
KERNEL_VERSION ?= 4.9.110
89
KERNEL_SUBVERSION ?= 3+deb9u6
@@ -79,6 +80,8 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
7980
git add debian/build/build_amd64_none_amd64/.config -f
8081
git add debian/config.defines.dump -f
8182
git add debian/control -f
83+
git add debian/rules.gen -f
84+
git add debian/tests/control -f
8285
git commit -m "unmodified debian source"
8386

8487
# Learning new git repo head (above commit) by calling stg repair.

patch/config-l3mdev-cgroup.patch

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
enable CONFIG_CGROUP_L3MDEV=y
2+
3+
From: Guohan Lu <gulv@microsoft.com>
4+
5+
6+
---
7+
debian/build/build_amd64_none_amd64/.config | 1 +
8+
1 file changed, 1 insertion(+)
9+
10+
diff --git a/debian/build/build_amd64_none_amd64/.config b/debian/build/build_amd64_none_amd64/.config
11+
index b7bcf15..a8d64a7 100644
12+
--- a/debian/build/build_amd64_none_amd64/.config
13+
+++ b/debian/build/build_amd64_none_amd64/.config
14+
@@ -1493,6 +1492,7 @@ CONFIG_MPLS_IPTUNNEL=m
15+
# CONFIG_HSR is not set
16+
# CONFIG_NET_SWITCHDEV is not set
17+
CONFIG_NET_L3_MASTER_DEV=y
18+
+CONFIG_CGROUP_L3MDEV=y
19+
# CONFIG_NET_NCSI is not set
20+
CONFIG_RPS=y
21+
CONFIG_RFS_ACCEL=y

patch/driver-l3mdev-cgroup.patch

+338
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
Add cgroup to assoicate tasks with L3 networking domains. AF_INET{6}
2+
sockets opened by tasks associated with an l3mdev cgroup are bound to
3+
the associated master device when the socket is created. This allows a
4+
user to run a command (and its children) within an L3 networking context.
5+
6+
The master-device for an l3mdev cgroup must be an L3 master device
7+
(e.g., VRF), and it must be set before attaching tasks to the cgroup. Once
8+
set the master-device can not change. Nested l3mdev cgroups are not
9+
supported. The root (aka default) l3mdev cgroup can not be bound to a
10+
master device.
11+
12+
Example:
13+
ip link add vrf-red type vrf table vrf-red
14+
ip link set dev vrf-red up
15+
ip link set dev eth1 master vrf-red
16+
17+
cgcreate -g l3mdev:vrf-red
18+
cgset -r l3mdev.master-device=vrf-red vrf-red
19+
cgexec -g l3mdev:vrf-red bash
20+
21+
At this point the current shell and its child processes are attached to
22+
the vrf-red L3 domain. Any AF_INET and AF_INET6 sockets opened by the
23+
tasks are bound to the vrf-red device.
24+
25+
TO-DO:
26+
- how to auto-create the cgroup when a VRF device is created and auto-deleted
27+
when a VRF device is destroyed
28+
29+
Signed-off-by: David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
30+
---
31+
include/linux/cgroup_subsys.h | 4 +
32+
include/net/l3mdev_cgroup.h | 27 ++++++
33+
net/core/sock.c | 2
34+
net/l3mdev/Kconfig | 12 +++
35+
net/l3mdev/Makefile | 1
36+
net/l3mdev/l3mdev_cgroup.c | 195 +++++++++++++++++++++++++++++++++++++++++
37+
6 files changed, 241 insertions(+)
38+
create mode 100644 include/net/l3mdev_cgroup.h
39+
create mode 100644 net/l3mdev/l3mdev_cgroup.c
40+
41+
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
42+
index 0df0336..72c220a 100644
43+
--- a/include/linux/cgroup_subsys.h
44+
+++ b/include/linux/cgroup_subsys.h
45+
@@ -48,6 +48,10 @@ SUBSYS(perf_event)
46+
SUBSYS(net_prio)
47+
#endif
48+
49+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
50+
+SUBSYS(l3mdev)
51+
+#endif
52+
+
53+
#if IS_ENABLED(CONFIG_CGROUP_HUGETLB)
54+
SUBSYS(hugetlb)
55+
#endif
56+
diff --git a/include/net/l3mdev_cgroup.h b/include/net/l3mdev_cgroup.h
57+
new file mode 100644
58+
index 0000000..7d2bef1
59+
--- /dev/null
60+
+++ b/include/net/l3mdev_cgroup.h
61+
@@ -0,0 +1,27 @@
62+
+/*
63+
+ * l3mdev_cgroup.h Control Group for L3 Master Device
64+
+ *
65+
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
66+
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
67+
+ *
68+
+ * This program is free software; you can redistribute it and/or modify
69+
+ * it under the terms of the GNU General Public License as published by
70+
+ * the Free Software Foundation; either version 2 of the License, or
71+
+ * (at your option) any later version.
72+
+ */
73+
+
74+
+#ifndef _L3MDEV_CGROUP_H
75+
+#define _L3MDEV_CGROUP_H
76+
+
77+
+#if IS_ENABLED(CONFIG_CGROUP_L3MDEV)
78+
+
79+
+void sock_update_l3mdev(struct sock *sk);
80+
+
81+
+#else /* !CONFIG_CGROUP_L3MDEV */
82+
+
83+
+static inline void sock_update_l3mdev(struct sock *sk)
84+
+{
85+
+}
86+
+
87+
+#endif /* CONFIG_CGROUP_L3MDEV */
88+
+#endif /* _L3MDEV_CGROUP_H */
89+
diff --git a/net/core/sock.c b/net/core/sock.c
90+
index 1c4c434..ebb25ca 100644
91+
--- a/net/core/sock.c
92+
+++ b/net/core/sock.c
93+
@@ -131,6 +131,7 @@
94+
#include <linux/ipsec.h>
95+
#include <net/cls_cgroup.h>
96+
#include <net/netprio_cgroup.h>
97+
+#include <net/l3mdev_cgroup.h>
98+
#include <linux/sock_diag.h>
99+
100+
#include <linux/filter.h>
101+
@@ -1402,6 +1403,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
102+
cgroup_sk_alloc(&sk->sk_cgrp_data);
103+
sock_update_classid(&sk->sk_cgrp_data);
104+
sock_update_netprioidx(&sk->sk_cgrp_data);
105+
+ sock_update_l3mdev(sk);
106+
}
107+
108+
return sk;
109+
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
110+
index 5d47325..3142d81 100644
111+
--- a/net/l3mdev/Kconfig
112+
+++ b/net/l3mdev/Kconfig
113+
@@ -8,3 +8,15 @@ config NET_L3_MASTER_DEV
114+
---help---
115+
This module provides glue between core networking code and device
116+
drivers to support L3 master devices like VRF.
117+
+
118+
+config CGROUP_L3MDEV
119+
+ bool "L3 Master Device cgroup"
120+
+ depends on CGROUPS
121+
+ depends on NET_L3_MASTER_DEV
122+
+ ---help---
123+
+ Cgroup subsystem for assigning processes to an L3 domain.
124+
+ When a process is assigned to an l3mdev domain all AF_INET and
125+
+ AF_INET6 sockets opened by the process are bound to the L3 master
126+
+ device.
127+
+
128+
+
129+
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
130+
index 84a53a6..ae74eba 100644
131+
--- a/net/l3mdev/Makefile
132+
+++ b/net/l3mdev/Makefile
133+
@@ -3,3 +3,4 @@
134+
#
135+
136+
obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
137+
+obj-$(CONFIG_CGROUP_L3MDEV) += l3mdev_cgroup.o
138+
diff --git a/net/l3mdev/l3mdev_cgroup.c b/net/l3mdev/l3mdev_cgroup.c
139+
new file mode 100644
140+
index 0000000..b5fea03
141+
--- /dev/null
142+
+++ b/net/l3mdev/l3mdev_cgroup.c
143+
@@ -0,0 +1,195 @@
144+
+/*
145+
+ * net/l3mdev/l3mdev_cgroup.c Control Group for L3 Master Devices
146+
+ *
147+
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
148+
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
149+
+ *
150+
+ * This program is free software; you can redistribute it and/or modify
151+
+ * it under the terms of the GNU General Public License as published by
152+
+ * the Free Software Foundation; either version 2 of the License, or
153+
+ * (at your option) any later version.
154+
+ */
155+
+
156+
+#include <linux/module.h>
157+
+#include <linux/slab.h>
158+
+#include <linux/types.h>
159+
+#include <linux/string.h>
160+
+#include <linux/cgroup.h>
161+
+#include <net/sock.h>
162+
+#include <net/l3mdev_cgroup.h>
163+
+
164+
+struct l3mdev_cgroup {
165+
+ struct cgroup_subsys_state css;
166+
+ struct net *net;
167+
+ int dev_idx;
168+
+};
169+
+
170+
+static inline struct l3mdev_cgroup *css_l3mdev(struct cgroup_subsys_state *css)
171+
+{
172+
+ return css ? container_of(css, struct l3mdev_cgroup, css) : NULL;
173+
+}
174+
+
175+
+static void l3mdev_set_bound_dev(struct sock *sk)
176+
+{
177+
+ struct task_struct *tsk = current;
178+
+ struct l3mdev_cgroup *l3mdev_cgrp;
179+
+
180+
+ rcu_read_lock();
181+
+
182+
+ l3mdev_cgrp = css_l3mdev(task_css(tsk, l3mdev_cgrp_id));
183+
+ if (l3mdev_cgrp && l3mdev_cgrp->dev_idx)
184+
+ sk->sk_bound_dev_if = l3mdev_cgrp->dev_idx;
185+
+
186+
+ rcu_read_unlock();
187+
+}
188+
+
189+
+void sock_update_l3mdev(struct sock *sk)
190+
+{
191+
+ switch (sk->sk_family) {
192+
+ case AF_INET:
193+
+ case AF_INET6:
194+
+ l3mdev_set_bound_dev(sk);
195+
+ break;
196+
+ }
197+
+}
198+
+
199+
+static bool is_root_cgroup(struct cgroup_subsys_state *css)
200+
+{
201+
+ return !css || !css->parent;
202+
+}
203+
+
204+
+static struct cgroup_subsys_state *
205+
+l3mdev_css_alloc(struct cgroup_subsys_state *parent_css)
206+
+{
207+
+ struct l3mdev_cgroup *l3mdev_cgrp;
208+
+
209+
+ /* nested l3mdev domains are not supportd */
210+
+ if (!is_root_cgroup(parent_css))
211+
+ return ERR_PTR(-EINVAL);
212+
+
213+
+ l3mdev_cgrp = kzalloc(sizeof(*l3mdev_cgrp), GFP_KERNEL);
214+
+ if (!l3mdev_cgrp)
215+
+ return ERR_PTR(-ENOMEM);
216+
+
217+
+ return &l3mdev_cgrp->css;
218+
+}
219+
+
220+
+static int l3mdev_css_online(struct cgroup_subsys_state *css)
221+
+{
222+
+ return 0;
223+
+}
224+
+
225+
+static void l3mdev_css_free(struct cgroup_subsys_state *css)
226+
+{
227+
+ kfree(css_l3mdev(css));
228+
+}
229+
+
230+
+static int l3mdev_read(struct seq_file *sf, void *v)
231+
+{
232+
+ struct cgroup_subsys_state *css = seq_css(sf);
233+
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
234+
+
235+
+ if (!l3mdev_cgrp)
236+
+ return -EINVAL;
237+
+
238+
+ if (l3mdev_cgrp->net) {
239+
+ struct net_device *dev;
240+
+
241+
+ dev = dev_get_by_index(l3mdev_cgrp->net, l3mdev_cgrp->dev_idx);
242+
+
243+
+ seq_printf(sf, "net[%u]: device index %d ==> %s\n",
244+
+ l3mdev_cgrp->net->ns.inum, l3mdev_cgrp->dev_idx,
245+
+ dev ? dev->name : "<none>");
246+
+
247+
+ if (dev)
248+
+ dev_put(dev);
249+
+ }
250+
+ return 0;
251+
+}
252+
+
253+
+static ssize_t l3mdev_write(struct kernfs_open_file *of,
254+
+ char *buf, size_t nbytes, loff_t off)
255+
+{
256+
+ struct cgroup_subsys_state *css = of_css(of);
257+
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
258+
+ struct net *net = current->nsproxy->net_ns;
259+
+ struct net_device *dev;
260+
+ char name[IFNAMSIZ];
261+
+ int rc = -EINVAL;
262+
+
263+
+ /* once master device is set can not undo. Must delete
264+
+ * cgroup and reset
265+
+ */
266+
+ if (l3mdev_cgrp->dev_idx)
267+
+ goto out;
268+
+
269+
+ /* root cgroup does not bind to an L3 domain */
270+
+ if (is_root_cgroup(css))
271+
+ goto out;
272+
+
273+
+ if (sscanf(buf, "%" __stringify(IFNAMSIZ) "s", name) != 1)
274+
+ goto out;
275+
+
276+
+ dev = dev_get_by_name(net, name);
277+
+ if (!dev) {
278+
+ rc = -ENODEV;
279+
+ goto out;
280+
+ }
281+
+
282+
+ if (netif_is_l3_master(dev)) {
283+
+ l3mdev_cgrp->net = net;
284+
+ l3mdev_cgrp->dev_idx = dev->ifindex;
285+
+ rc = 0;
286+
+ }
287+
+
288+
+ dev_put(dev);
289+
+out:
290+
+ return rc ? : nbytes;
291+
+}
292+
+
293+
+/* make master device is set for non-root cgroups before tasks can be added */
294+
+static int l3mdev_can_attach(struct cgroup_taskset *tset)
295+
+{
296+
+ struct cgroup_subsys_state *dst_css;
297+
+ struct task_struct *tsk;
298+
+ int rc = 0;
299+
+
300+
+ cgroup_taskset_for_each(tsk, dst_css, tset) {
301+
+ struct l3mdev_cgroup *l3mdev_cgrp;
302+
+
303+
+ l3mdev_cgrp = css_l3mdev(dst_css);
304+
+ if (!is_root_cgroup(dst_css) && !l3mdev_cgrp->dev_idx) {
305+
+ rc = -ENODEV;
306+
+ break;
307+
+ }
308+
+ }
309+
+
310+
+ return rc;
311+
+}
312+
+
313+
+static struct cftype ss_files[] = {
314+
+ {
315+
+ .name = "master-device",
316+
+ .seq_show = l3mdev_read,
317+
+ .write = l3mdev_write,
318+
+ },
319+
+ { } /* terminate */
320+
+};
321+
+
322+
+struct cgroup_subsys l3mdev_cgrp_subsys = {
323+
+ .css_alloc = l3mdev_css_alloc,
324+
+ .css_online = l3mdev_css_online,
325+
+ .css_free = l3mdev_css_free,
326+
+ .can_attach = l3mdev_can_attach,
327+
+ .legacy_cftypes = ss_files,
328+
+};
329+
+
330+
+static int __init init_cgroup_l3mdev(void)
331+
+{
332+
+ return 0;
333+
+}
334+
+
335+
+subsys_initcall(init_cgroup_l3mdev);
336+
+MODULE_AUTHOR("David Ahern");
337+
+MODULE_DESCRIPTION("Control Group for L3 Networking Domains");
338+
+MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)