Skip to content

Commit ac74d25

Browse files
Cong WangKernel Patches Daemon
authored andcommitted
net: introduce a new proto_ops ->read_skb()
Currently both splice() and sockmap use ->read_sock() to read skb from receive queue, but for sockmap we only read one entire skb at a time, so ->read_sock() is too conservative to use. Introduce a new proto_ops ->read_skb() which supports this sematic, with this we can finally pass the ownership of skb to recv actors. For non-TCP protocols, all ->read_sock() can be simply converted to ->read_skb(). Cc: Eric Dumazet <edumazet@google.com> Cc: John Fastabend <john.fastabend@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
1 parent 644dde0 commit ac74d25

File tree

9 files changed

+31
-47
lines changed

9 files changed

+31
-47
lines changed

include/linux/net.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ struct module;
152152
struct sk_buff;
153153
typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
154154
unsigned int, size_t);
155+
typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
156+
155157

156158
struct proto_ops {
157159
int family;
@@ -214,6 +216,8 @@ struct proto_ops {
214216
*/
215217
int (*read_sock)(struct sock *sk, read_descriptor_t *desc,
216218
sk_read_actor_t recv_actor);
219+
/* This is different from read_sock(), it reads an entire skb at a time. */
220+
int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
217221
int (*sendpage_locked)(struct sock *sk, struct page *page,
218222
int offset, size_t size, int flags);
219223
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,

include/net/tcp.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,8 +669,7 @@ void tcp_get_info(struct sock *, struct tcp_info *);
669669
/* Read 'sendfile()'-style from a TCP socket */
670670
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
671671
sk_read_actor_t recv_actor);
672-
int tcp_read_skb(struct sock *sk, read_descriptor_t *desc,
673-
sk_read_actor_t recv_actor);
672+
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
674673

675674
void tcp_initialize_rcv_mss(struct sock *sk);
676675

include/net/udp.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
305305
struct sk_buff *skb);
306306
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
307307
__be16 sport, __be16 dport);
308-
int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
309-
sk_read_actor_t recv_actor);
308+
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
310309

311310
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
312311
* possibly multiple cache miss on dequeue()

net/core/skmsg.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,21 +1160,17 @@ static void sk_psock_done_strp(struct sk_psock *psock)
11601160
}
11611161
#endif /* CONFIG_BPF_STREAM_PARSER */
11621162

1163-
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
1164-
unsigned int offset, size_t orig_len)
1163+
static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
11651164
{
1166-
struct sock *sk = (struct sock *)desc->arg.data;
11671165
struct sk_psock *psock;
11681166
struct bpf_prog *prog;
11691167
int ret = __SK_DROP;
1170-
int len = orig_len;
1168+
int len = skb->len;
11711169

11721170
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
11731171
skb = skb_clone(skb, GFP_ATOMIC);
1174-
if (!skb) {
1175-
desc->error = -ENOMEM;
1172+
if (!skb)
11761173
return 0;
1177-
}
11781174

11791175
rcu_read_lock();
11801176
psock = sk_psock(sk);
@@ -1204,16 +1200,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
12041200
static void sk_psock_verdict_data_ready(struct sock *sk)
12051201
{
12061202
struct socket *sock = sk->sk_socket;
1207-
read_descriptor_t desc;
12081203

1209-
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
1204+
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
12101205
return;
1211-
1212-
desc.arg.data = sk;
1213-
desc.error = 0;
1214-
desc.count = 1;
1215-
1216-
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
1206+
sock->ops->read_skb(sk, sk_psock_verdict_recv);
12171207
}
12181208

12191209
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)

net/ipv4/af_inet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = {
10401040
.sendpage = inet_sendpage,
10411041
.splice_read = tcp_splice_read,
10421042
.read_sock = tcp_read_sock,
1043+
.read_skb = tcp_read_skb,
10431044
.sendmsg_locked = tcp_sendmsg_locked,
10441045
.sendpage_locked = tcp_sendpage_locked,
10451046
.peek_len = tcp_peek_len,
@@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = {
10671068
.setsockopt = sock_common_setsockopt,
10681069
.getsockopt = sock_common_getsockopt,
10691070
.sendmsg = inet_sendmsg,
1070-
.read_sock = udp_read_sock,
1071+
.read_skb = udp_read_skb,
10711072
.recvmsg = inet_recvmsg,
10721073
.mmap = sock_no_mmap,
10731074
.sendpage = inet_sendpage,

net/ipv4/tcp.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,8 +1709,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
17091709
}
17101710
EXPORT_SYMBOL(tcp_read_sock);
17111711

1712-
int tcp_read_skb(struct sock *sk, read_descriptor_t *desc,
1713-
sk_read_actor_t recv_actor)
1712+
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
17141713
{
17151714
struct tcp_sock *tp = tcp_sk(sk);
17161715
u32 seq = tp->copied_seq;
@@ -1725,7 +1724,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc,
17251724
int used;
17261725

17271726
__skb_unlink(skb, &sk->sk_receive_queue);
1728-
used = recv_actor(desc, skb, 0, skb->len);
1727+
used = recv_actor(sk, skb);
17291728
if (used <= 0) {
17301729
if (!copied)
17311730
copied = used;
@@ -1740,9 +1739,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc,
17401739
break;
17411740
}
17421741
kfree_skb(skb);
1743-
if (!desc->count)
1744-
break;
1745-
WRITE_ONCE(tp->copied_seq, seq);
1742+
break;
17461743
}
17471744
WRITE_ONCE(tp->copied_seq, seq);
17481745

net/ipv4/udp.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,8 +1795,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
17951795
}
17961796
EXPORT_SYMBOL(__skb_recv_udp);
17971797

1798-
int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
1799-
sk_read_actor_t recv_actor)
1798+
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
18001799
{
18011800
int copied = 0;
18021801

@@ -1818,7 +1817,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
18181817
continue;
18191818
}
18201819

1821-
used = recv_actor(desc, skb, 0, skb->len);
1820+
used = recv_actor(sk, skb);
18221821
if (used <= 0) {
18231822
if (!copied)
18241823
copied = used;
@@ -1829,13 +1828,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
18291828
}
18301829

18311830
kfree_skb(skb);
1832-
if (!desc->count)
1833-
break;
1831+
break;
18341832
}
18351833

18361834
return copied;
18371835
}
1838-
EXPORT_SYMBOL(udp_read_sock);
1836+
EXPORT_SYMBOL(udp_read_skb);
18391837

18401838
/*
18411839
* This should be easy, if there is something there we

net/ipv6/af_inet6.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ const struct proto_ops inet6_stream_ops = {
701701
.sendpage_locked = tcp_sendpage_locked,
702702
.splice_read = tcp_splice_read,
703703
.read_sock = tcp_read_sock,
704+
.read_skb = tcp_read_skb,
704705
.peek_len = tcp_peek_len,
705706
#ifdef CONFIG_COMPAT
706707
.compat_ioctl = inet6_compat_ioctl,
@@ -726,7 +727,7 @@ const struct proto_ops inet6_dgram_ops = {
726727
.getsockopt = sock_common_getsockopt, /* ok */
727728
.sendmsg = inet6_sendmsg, /* retpoline's sake */
728729
.recvmsg = inet6_recvmsg, /* retpoline's sake */
729-
.read_sock = udp_read_sock,
730+
.read_skb = udp_read_skb,
730731
.mmap = sock_no_mmap,
731732
.sendpage = sock_no_sendpage,
732733
.set_peek_off = sk_set_peek_off,

net/unix/af_unix.c

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -741,10 +741,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
741741
unsigned int flags);
742742
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
743743
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
744-
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
745-
sk_read_actor_t recv_actor);
746-
static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
747-
sk_read_actor_t recv_actor);
744+
static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
745+
static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
748746
static int unix_dgram_connect(struct socket *, struct sockaddr *,
749747
int, int);
750748
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -798,7 +796,7 @@ static const struct proto_ops unix_stream_ops = {
798796
.shutdown = unix_shutdown,
799797
.sendmsg = unix_stream_sendmsg,
800798
.recvmsg = unix_stream_recvmsg,
801-
.read_sock = unix_stream_read_sock,
799+
.read_skb = unix_stream_read_skb,
802800
.mmap = sock_no_mmap,
803801
.sendpage = unix_stream_sendpage,
804802
.splice_read = unix_stream_splice_read,
@@ -823,7 +821,7 @@ static const struct proto_ops unix_dgram_ops = {
823821
.listen = sock_no_listen,
824822
.shutdown = unix_shutdown,
825823
.sendmsg = unix_dgram_sendmsg,
826-
.read_sock = unix_read_sock,
824+
.read_skb = unix_read_skb,
827825
.recvmsg = unix_dgram_recvmsg,
828826
.mmap = sock_no_mmap,
829827
.sendpage = sock_no_sendpage,
@@ -2487,8 +2485,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
24872485
return __unix_dgram_recvmsg(sk, msg, size, flags);
24882486
}
24892487

2490-
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2491-
sk_read_actor_t recv_actor)
2488+
static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
24922489
{
24932490
int copied = 0;
24942491

@@ -2503,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
25032500
if (!skb)
25042501
return err;
25052502

2506-
used = recv_actor(desc, skb, 0, skb->len);
2503+
used = recv_actor(sk, skb);
25072504
if (used <= 0) {
25082505
if (!copied)
25092506
copied = used;
@@ -2514,8 +2511,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
25142511
}
25152512

25162513
kfree_skb(skb);
2517-
if (!desc->count)
2518-
break;
2514+
break;
25192515
}
25202516

25212517
return copied;
@@ -2650,13 +2646,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
26502646
}
26512647
#endif
26522648

2653-
static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2654-
sk_read_actor_t recv_actor)
2649+
static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
26552650
{
26562651
if (unlikely(sk->sk_state != TCP_ESTABLISHED))
26572652
return -ENOTCONN;
26582653

2659-
return unix_read_sock(sk, desc, recv_actor);
2654+
return unix_read_skb(sk, recv_actor);
26602655
}
26612656

26622657
static int unix_stream_read_generic(struct unix_stream_read_state *state,

0 commit comments

Comments
 (0)