-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinux-2.6-525-sknid-elevator.patch
240 lines (220 loc) · 7.51 KB
/
linux-2.6-525-sknid-elevator.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
Index: linux-2.6.27.y/include/linux/netdevice.h
===================================================================
--- linux-2.6.27.y.orig/include/linux/netdevice.h
+++ linux-2.6.27.y/include/linux/netdevice.h
@@ -857,6 +857,7 @@ static inline void netif_napi_del(struct
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
+ unsigned char sknid_elevator;
int (*func) (struct sk_buff *,
struct net_device *,
struct packet_type *,
Index: linux-2.6.27.y/net/core/dev.c
===================================================================
--- linux-2.6.27.y.orig/net/core/dev.c
+++ linux-2.6.27.y/net/core/dev.c
@@ -99,6 +99,8 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
#include <linux/if_bridge.h>
#include <linux/if_macvlan.h>
#include <net/dst.h>
@@ -1318,7 +1320,7 @@ static void dev_queue_xmit_nit(struct sk
if ((ptype->dev == dev || !ptype->dev) &&
(ptype->af_packet_priv == NULL ||
(struct sock *)ptype->af_packet_priv != skb->sk)) {
- struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
break;
@@ -2170,6 +2172,10 @@ void netif_nit_deliver(struct sk_buff *s
rcu_read_unlock();
}
+/* The code already makes the assumption that packet handlers run
+ * sequentially on the same CPU. -Sapan */
+DEFINE_PER_CPU(int, sknid_elevator) = 0;
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
@@ -2191,8 +2197,11 @@ int netif_receive_skb(struct sk_buff *sk
struct net_device *orig_dev;
struct net_device *null_or_orig;
int ret = NET_RX_DROP;
+ int *cur_elevator = &__get_cpu_var(sknid_elevator);
__be16 type;
+ *cur_elevator = 0;
+
if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
return NET_RX_SUCCESS;
@@ -2272,7 +2281,27 @@ ncls:
}
if (pt_prev) {
+ /* At this point, cur_elevator may be -2 or a positive value, in
+ * case a previous protocol handler marked it */
+ if (*cur_elevator) {
+ atomic_inc(&skb->users);
+ }
+
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+
+ if ((*cur_elevator)>0) {
+ skb->skb_tag = *cur_elevator;
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) {
+ ret = deliver_skb(skb, ptype, orig_dev);
+ }
+ }
+ }
+
+ if (*cur_elevator) {
+ /* We have a packet */
+ kfree_skb(skb);
+ }
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
@@ -4895,6 +4924,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
EXPORT_SYMBOL(net_enable_timestamp);
EXPORT_SYMBOL(net_disable_timestamp);
EXPORT_SYMBOL(dev_get_flags);
+EXPORT_PER_CPU_SYMBOL(sknid_elevator);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
EXPORT_SYMBOL(br_handle_frame_hook);
Index: linux-2.6.27.y/net/packet/af_packet.c
===================================================================
--- linux-2.6.27.y.orig/net/packet/af_packet.c
+++ linux-2.6.27.y/net/packet/af_packet.c
@@ -77,6 +77,7 @@
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/vs_network.h>
#include <linux/mutex.h>
#ifdef CONFIG_INET
@@ -278,10 +279,53 @@ static const struct proto_ops packet_ops
static const struct proto_ops packet_ops_spkt;
+extern DEFINE_PER_CPU(int, sknid_elevator);
+
+static inline unsigned int slice_check_and_elevate(struct sk_buff *skb, struct sock *sk) {
+ /* This mechanism is quite involved, and caused us a lot of pain
+ * including crashes and packet loss during the 4.2 rollout. This
+ * function decides if a slice is allowed to see a given packet.
+ * Unfortunately, the first time it is invoked for a packet it does not
+ * have enough information to make this call, since xt_MARK has not had
+ * a chance to tag it with the slice id. There is also no way of
+ * passing state between xt_MARK and this function through a packet --
+ * because the skb gets cloned quite a few times between these two
+ * points. I'd rather not use skb_shared_info because it's treated as
+ * a blob of memory, and so it would be quite hard to maintain.
+ *
+ * What we do is to keep a global variable (per CPU) that transfers the
+ * required state between xt_MARK and af_packet.c. As an optimization,
+ * this state transfer and the step that follows is only executed for
+ * packets that first get dropped here. When we drop a packet, we mark
+ * it for 'elevation' (that's what this trick is called). When xt_MARK
+ * tags the packet with the right slice, it intercepts this mark and
+ * sets the value of sknid_elevator. Next, the packet is sent back here
+ * for a second round, this time with the xid tag set.
+ */
+
+ int *elevator=&__get_cpu_var(sknid_elevator);
+ int tag = skb->skb_tag;
+
+ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
+ if (skb->pkt_type==PACKET_HOST) {
+ *elevator=-2; /* Rejecting this packet. Mark it for elevation in xt_MARK */
+ }
+ return 0;
+ }
+ else if (!sk->sk_nx_info && (*elevator>0)) {
+ /* Root has already seen this packet once, since it has been elevated */
+ return 0;
+ }
+
+ return 1;
+}
+
static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct sock *sk;
struct sockaddr_pkt *spkt;
+ int tag = skb->skb_tag;
+
/*
* When we registered the protocol we saved the socket in the data
@@ -301,6 +345,16 @@ static int packet_rcv_spkt(struct sk_buf
* so that this procedure is noop.
*/
+ /*
+ * (18:05:41) daniel_hozac: where?
+ * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
+ * (18:05:58) er: in packet_rcv_skpt
+ * (18:07:33) daniel_hozac: oh, that's evil.
+ */
+
+ if (!slice_check_and_elevate(skb, sk))
+ return 0;
+
if (skb->pkt_type == PACKET_LOOPBACK)
goto out;
@@ -359,6 +413,9 @@ static int packet_sendmsg_spkt(struct ki
__be16 proto=0;
int err;
+ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
+ return -EPERM;
+
/*
* Get and verify the address.
*/
@@ -451,11 +508,16 @@ out_unlock:
return err;
}
+
+
static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
unsigned int res)
{
struct sk_filter *filter;
+ if (!slice_check_and_elevate(skb, sk))
+ return 0;
+
rcu_read_lock_bh();
filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
@@ -775,6 +837,9 @@ static int packet_sendmsg(struct kiocb *
unsigned char *addr;
int ifindex, err, reserve = 0;
+ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
+ return -EPERM;
+
/*
* Get and verify the address.
*/
@@ -941,6 +1006,7 @@ static int packet_do_bind(struct sock *s
po->num = protocol;
po->prot_hook.type = protocol;
+ po->prot_hook.sknid_elevator = 1;
po->prot_hook.dev = dev;
po->ifindex = dev ? dev->ifindex : 0;
@@ -1039,8 +1105,9 @@ static int packet_create(struct net *net
__be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
- if (!capable(CAP_NET_RAW))
+ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
return -EPERM;
+
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
return -ESOCKTNOSUPPORT;
@@ -1072,6 +1139,7 @@ static int packet_create(struct net *net
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
po->prot_hook.func = packet_rcv;
+ po->prot_hook.sknid_elevator = 1;
if (sock->type == SOCK_PACKET)
po->prot_hook.func = packet_rcv_spkt;