Skip to content

Commit

Permalink
sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)
Browse files Browse the repository at this point in the history
Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for net/can
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-afs@lists.infradead.org
cc: mptcp@lists.linux.dev
cc: rds-devel@oss.oracle.com
cc: tipc-discussion@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
Link: https://lore.kernel.org/r/20230623225513.2732256-16-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
dhowells authored and kuba-moo committed Jun 24, 2023
1 parent e52828c commit dc97391
Show file tree
Hide file tree
Showing 66 changed files with 20 additions and 442 deletions.
10 changes: 5 additions & 5 deletions Documentation/bpf/map_sockmap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,11 @@ offsets into ``msg``, respectively.
If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only
parse data that the (``data``, ``data_end``) pointers have already consumed.
For ``sendmsg()`` hooks this is likely the first scatterlist element. But for
calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be
the range (**0**, **0**) because the data is shared with user space and by
default the objective is to avoid allowing user space to modify data while (or
after) BPF verdict is being decided. This helper can be used to pull in data
and to set the start and end pointers to given values. Data will be copied if
calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the
range (**0**, **0**) because the data is shared with user space and by default
the objective is to avoid allowing user space to modify data while (or after)
BPF verdict is being decided. This helper can be used to pull in data and to
set the start and end pointers to given values. Data will be copied if
necessary (i.e., if data was not linear and if start and end pointers do not
point to the same chunk).

Expand Down
2 changes: 0 additions & 2 deletions Documentation/filesystems/locking.rst
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,6 @@ prototypes::
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
Expand Down
1 change: 0 additions & 1 deletion Documentation/filesystems/vfs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,6 @@ This describes how the VFS can manipulate an open file. As of kernel
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
Expand Down
4 changes: 2 additions & 2 deletions Documentation/networking/scaling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ a single application thread handles flows with many different flow hashes.
rps_sock_flow_table is a global flow table that contains the *desired* CPU
for flows: the CPU that is currently processing the flow in userspace.
Each table value is a CPU index that is updated during calls to recvmsg
and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
and tcp_splice_read()).
and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
tcp_splice_read()).

When the scheduler moves a thread to a new CPU while it has outstanding
receive packets on the old CPU, packets may arrive out of order. To
Expand Down
28 changes: 0 additions & 28 deletions crypto/af_alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,6 @@ static const struct proto_ops alg_proto_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,

Expand Down Expand Up @@ -1106,33 +1105,6 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
EXPORT_SYMBOL_GPL(af_alg_sendmsg);

/**
* af_alg_sendpage - sendpage system call handler
* @sock: socket of connection to user space to write to
* @page: data to send
* @offset: offset into page to begin sending
* @size: length of data
* @flags: message send/receive flags
*
* This is a generic implementation of sendpage to fill ctx->tsgl_list.
*/
ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct bio_vec bvec;
struct msghdr msg = {
.msg_flags = flags | MSG_SPLICE_PAGES,
};

if (flags & MSG_SENDPAGE_NOTLAST)
msg.msg_flags |= MSG_MORE;

bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
return sock_sendmsg(sock, &msg);
}
EXPORT_SYMBOL_GPL(af_alg_sendpage);

/**
* af_alg_free_resources - release resources required for crypto request
* @areq: Request holding the TX and RX SGL
Expand Down
22 changes: 4 additions & 18 deletions crypto/algif_aead.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
* filled by user space with the data submitted via sendpage. Filling up
* the TX SGL does not cause a crypto operation -- the data will only be
* tracked by the kernel. Upon receipt of one recvmsg call, the caller must
* provide a buffer which is tracked with the RX SGL.
* filled by user space with the data submitted via sendmsg (maybe with
* MSG_SPLICE_PAGES). Filling up the TX SGL does not cause a crypto operation
* -- the data will only be tracked by the kernel. Upon receipt of one recvmsg
* call, the caller must provide a buffer which is tracked with the RX SGL.
*
* During the processing of the recvmsg operation, the cipher request is
* allocated and prepared. As part of the recvmsg operation, the processed
Expand Down Expand Up @@ -370,7 +370,6 @@ static struct proto_ops algif_aead_ops = {

.release = af_alg_release,
.sendmsg = aead_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
.poll = af_alg_poll,
};
Expand Down Expand Up @@ -422,18 +421,6 @@ static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return aead_sendmsg(sock, msg, size);
}

static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
int err;

err = aead_check_key(sock);
if (err)
return err;

return af_alg_sendpage(sock, page, offset, size, flags);
}

static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
Expand Down Expand Up @@ -461,7 +448,6 @@ static struct proto_ops algif_aead_ops_nokey = {

.release = af_alg_release,
.sendmsg = aead_sendmsg_nokey,
.sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
.poll = af_alg_poll,
};
Expand Down
2 changes: 0 additions & 2 deletions crypto/algif_rng.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ static struct proto_ops algif_rng_ops = {
.bind = sock_no_bind,
.accept = sock_no_accept,
.sendmsg = sock_no_sendmsg,
.sendpage = sock_no_sendpage,

.release = af_alg_release,
.recvmsg = rng_recvmsg,
Expand All @@ -192,7 +191,6 @@ static struct proto_ops __maybe_unused algif_rng_test_ops = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.accept = sock_no_accept,
.sendpage = sock_no_sendpage,

.release = af_alg_release,
.recvmsg = rng_test_recvmsg,
Expand Down
14 changes: 0 additions & 14 deletions crypto/algif_skcipher.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ static struct proto_ops algif_skcipher_ops = {

.release = af_alg_release,
.sendmsg = skcipher_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
.poll = af_alg_poll,
};
Expand Down Expand Up @@ -246,18 +245,6 @@ static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return skcipher_sendmsg(sock, msg, size);
}

static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
int err;

err = skcipher_check_key(sock);
if (err)
return err;

return af_alg_sendpage(sock, page, offset, size, flags);
}

static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
Expand Down Expand Up @@ -285,7 +272,6 @@ static struct proto_ops algif_skcipher_ops_nokey = {

.release = af_alg_release,
.sendmsg = skcipher_sendmsg_nokey,
.sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
.poll = af_alg_poll,
};
Expand Down
2 changes: 0 additions & 2 deletions drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,6 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
void chtls_splice_eof(struct socket *sock);
int chtls_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int send_tx_flowc_wr(struct sock *sk, int compl,
u32 snd_nxt, u32 rcv_nxt);
void chtls_tcp_push(struct sock *sk, int flags);
Expand Down
14 changes: 0 additions & 14 deletions drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1246,20 +1246,6 @@ void chtls_splice_eof(struct socket *sock)
release_sock(sk);
}

int chtls_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
struct bio_vec bvec;

if (flags & MSG_SENDPAGE_NOTLAST)
msg.msg_flags |= MSG_MORE;

bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
return chtls_sendmsg(sk, &msg, size);
}

static void chtls_select_window(struct sock *sk)
{
struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,6 @@ static void __init chtls_init_ulp_ops(void)
chtls_cpl_prot.shutdown = chtls_shutdown;
chtls_cpl_prot.sendmsg = chtls_sendmsg;
chtls_cpl_prot.splice_eof = chtls_splice_eof;
chtls_cpl_prot.sendpage = chtls_sendpage;
chtls_cpl_prot.recvmsg = chtls_recvmsg;
chtls_cpl_prot.setsockopt = chtls_setsockopt;
chtls_cpl_prot.getsockopt = chtls_getsockopt;
Expand Down
2 changes: 1 addition & 1 deletion fs/nfsd/vfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,

/*
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
* so that they can be passed to the network sendmsg routines
* directly. They will be released after the sending has completed.
*
* Return values: Number of bytes consumed, or -EIO if there are no
Expand Down
2 changes: 0 additions & 2 deletions include/crypto/if_alg.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,6 @@ void af_alg_wmem_wakeup(struct sock *sk);
int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
unsigned int ivsize);
ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
void af_alg_free_resources(struct af_alg_async_req *areq);
void af_alg_async_cb(void *data, int err);
__poll_t af_alg_poll(struct file *file, struct socket *sock,
Expand Down
8 changes: 0 additions & 8 deletions include/linux/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ struct proto_ops {
size_t total_len, int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
void (*splice_eof)(struct socket *sock);
Expand All @@ -222,8 +220,6 @@ struct proto_ops {
sk_read_actor_t recv_actor);
/* This is different from read_sock(), it reads an entire skb at a time. */
int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
int (*sendpage_locked)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
size_t size);
int (*set_rcvlowat)(struct sock *sk, int val);
Expand Down Expand Up @@ -341,10 +337,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);

/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
Expand Down
2 changes: 0 additions & 2 deletions include/net/inet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
void inet_splice_eof(struct socket *sock);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int inet_shutdown(struct socket *sock, int how);
Expand Down
6 changes: 0 additions & 6 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -1277,8 +1277,6 @@ struct proto {
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr *addr, int addr_len);
Expand Down Expand Up @@ -1919,10 +1917,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
int sock_no_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma);
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
int offset, size_t size, int flags);

/*
* Functions to fill in entries in struct proto_ops when a protocol
Expand Down
4 changes: 0 additions & 4 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
size_t size, struct ubuf_info *uarg);
void tcp_splice_eof(struct socket *sock);
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
Expand Down
1 change: 0 additions & 1 deletion net/appletalk/ddp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1929,7 +1929,6 @@ static const struct proto_ops atalk_dgram_ops = {
.sendmsg = atalk_sendmsg,
.recvmsg = atalk_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static struct notifier_block ddp_notifier = {
Expand Down
1 change: 0 additions & 1 deletion net/atm/pvc.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ static const struct proto_ops pvc_proto_ops = {
.sendmsg = vcc_sendmsg,
.recvmsg = vcc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};


Expand Down
1 change: 0 additions & 1 deletion net/atm/svc.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,6 @@ static const struct proto_ops svc_proto_ops = {
.sendmsg = vcc_sendmsg,
.recvmsg = vcc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};


Expand Down
1 change: 0 additions & 1 deletion net/ax25/af_ax25.c
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,6 @@ static const struct proto_ops ax25_proto_ops = {
.sendmsg = ax25_sendmsg,
.recvmsg = ax25_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

/*
Expand Down
2 changes: 0 additions & 2 deletions net/caif/caif_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,6 @@ static const struct proto_ops caif_seqpacket_ops = {
.sendmsg = caif_seqpkt_sendmsg,
.recvmsg = caif_seqpkt_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static const struct proto_ops caif_stream_ops = {
Expand All @@ -996,7 +995,6 @@ static const struct proto_ops caif_stream_ops = {
.sendmsg = caif_stream_sendmsg,
.recvmsg = caif_stream_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

/* This function is called when a socket is finally destroyed. */
Expand Down
1 change: 0 additions & 1 deletion net/can/bcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1703,7 +1703,6 @@ static const struct proto_ops bcm_ops = {
.sendmsg = bcm_sendmsg,
.recvmsg = bcm_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static struct proto bcm_proto __read_mostly = {
Expand Down
1 change: 0 additions & 1 deletion net/can/isotp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1699,7 +1699,6 @@ static const struct proto_ops isotp_ops = {
.sendmsg = isotp_sendmsg,
.recvmsg = isotp_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static struct proto isotp_proto __read_mostly = {
Expand Down
1 change: 0 additions & 1 deletion net/can/j1939/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,6 @@ static const struct proto_ops j1939_ops = {
.sendmsg = j1939_sk_sendmsg,
.recvmsg = j1939_sk_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static struct proto j1939_proto __read_mostly = {
Expand Down
1 change: 0 additions & 1 deletion net/can/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,6 @@ static const struct proto_ops raw_ops = {
.sendmsg = raw_sendmsg,
.recvmsg = raw_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

static struct proto raw_proto __read_mostly = {
Expand Down
Loading

0 comments on commit dc97391

Please sign in to comment.