Skip to content

Commit

Permalink
afs: Use ITER_XARRAY for writing
Browse files Browse the repository at this point in the history
Use a single ITER_XARRAY iterator to describe the portion of a file to be
transmitted to the server rather than generating a series of small
ITER_BVEC iterators on the fly.  This will make it easier to implement AIO
in afs.

In theory we could maybe use one giant ITER_BVEC, but that means
potentially allocating a huge array of bio_vec structs (max 256 per page)
when in fact the pagecache already has a structure listing all the relevant
pages (radix_tree/xarray) that can be walked over.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
  • Loading branch information
dhowells committed Apr 23, 2021
1 parent c450846 commit bd80d8a
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 218 deletions.
50 changes: 20 additions & 30 deletions fs/afs/fsclient.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,8 +1055,7 @@ static const struct afs_call_type afs_RXFSStoreData64 = {
/*
* store a set of pages to a very large file
*/
static void afs_fs_store_data64(struct afs_operation *op,
loff_t pos, loff_t size, loff_t i_size)
static void afs_fs_store_data64(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
Expand All @@ -1071,7 +1070,7 @@ static void afs_fs_store_data64(struct afs_operation *op,
if (!call)
return afs_op_nomem(op);

call->send_pages = true;
call->write_iter = op->store.write_iter;

/* marshall the parameters */
bp = call->request;
Expand All @@ -1087,55 +1086,46 @@ static void afs_fs_store_data64(struct afs_operation *op,
*bp++ = 0; /* unix mode */
*bp++ = 0; /* segment size */

*bp++ = htonl(upper_32_bits(pos));
*bp++ = htonl(lower_32_bits(pos));
*bp++ = htonl(upper_32_bits(size));
*bp++ = htonl(lower_32_bits(size));
*bp++ = htonl(upper_32_bits(i_size));
*bp++ = htonl(lower_32_bits(i_size));
*bp++ = htonl(upper_32_bits(op->store.pos));
*bp++ = htonl(lower_32_bits(op->store.pos));
*bp++ = htonl(upper_32_bits(op->store.size));
*bp++ = htonl(lower_32_bits(op->store.size));
*bp++ = htonl(upper_32_bits(op->store.i_size));
*bp++ = htonl(lower_32_bits(op->store.i_size));

trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}

/*
* store a set of pages
* Write data to a file on the server.
*/
void afs_fs_store_data(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
loff_t size, pos, i_size;
__be32 *bp;

_enter(",%x,{%llx:%llu},,",
key_serial(op->key), vp->fid.vid, vp->fid.vnode);

size = (loff_t)op->store.last_to - (loff_t)op->store.first_offset;
if (op->store.first != op->store.last)
size += (loff_t)(op->store.last - op->store.first) << PAGE_SHIFT;
pos = (loff_t)op->store.first << PAGE_SHIFT;
pos += op->store.first_offset;

i_size = i_size_read(&vp->vnode->vfs_inode);
if (pos + size > i_size)
i_size = size + pos;

_debug("size %llx, at %llx, i_size %llx",
(unsigned long long) size, (unsigned long long) pos,
(unsigned long long) i_size);
(unsigned long long)op->store.size,
(unsigned long long)op->store.pos,
(unsigned long long)op->store.i_size);

if (upper_32_bits(pos) || upper_32_bits(i_size) || upper_32_bits(size) ||
upper_32_bits(pos + size))
return afs_fs_store_data64(op, pos, size, i_size);
if (upper_32_bits(op->store.pos) ||
upper_32_bits(op->store.size) ||
upper_32_bits(op->store.i_size))
return afs_fs_store_data64(op);

call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData,
(4 + 6 + 3) * 4,
(21 + 6) * 4);
if (!call)
return afs_op_nomem(op);

call->send_pages = true;
call->write_iter = op->store.write_iter;

/* marshall the parameters */
bp = call->request;
Expand All @@ -1151,9 +1141,9 @@ void afs_fs_store_data(struct afs_operation *op)
*bp++ = 0; /* unix mode */
*bp++ = 0; /* segment size */

*bp++ = htonl(lower_32_bits(pos));
*bp++ = htonl(lower_32_bits(size));
*bp++ = htonl(lower_32_bits(i_size));
*bp++ = htonl(lower_32_bits(op->store.pos));
*bp++ = htonl(lower_32_bits(op->store.size));
*bp++ = htonl(lower_32_bits(op->store.i_size));

trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
Expand Down
15 changes: 8 additions & 7 deletions fs/afs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ struct afs_call {
void *request; /* request data (first part) */
size_t iov_len; /* Size of *iter to be used */
struct iov_iter def_iter; /* Default buffer/data iterator */
struct iov_iter *write_iter; /* Iterator defining write to be made */
struct iov_iter *iter; /* Iterator currently in use */
union { /* Convenience for ->def_iter */
struct kvec kvec[1];
Expand All @@ -133,7 +134,6 @@ struct afs_call {
unsigned char unmarshall; /* unmarshalling phase */
unsigned char addr_ix; /* Address in ->alist */
bool drop_ref; /* T if need to drop ref for incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
Expand Down Expand Up @@ -811,12 +811,13 @@ struct afs_operation {
afs_lock_type_t type;
} lock;
struct {
struct address_space *mapping; /* Pages being written from */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
bool laundering; /* Laundering page, PG_writeback not set */
struct iov_iter *write_iter;
loff_t pos;
loff_t size;
loff_t i_size;
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
Expand Down
103 changes: 15 additions & 88 deletions fs/afs/rxrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,40 +271,6 @@ void afs_flat_call_destructor(struct afs_call *call)
call->buffer = NULL;
}

#define AFS_BVEC_MAX 8

/*
* Load the given bvec with the next few pages.
*/
static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
struct bio_vec *bv, pgoff_t first, pgoff_t last,
unsigned offset)
{
struct afs_operation *op = call->op;
struct page *pages[AFS_BVEC_MAX];
unsigned int nr, n, i, to, bytes = 0;

nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
n = find_get_pages_contig(op->store.mapping, first, nr, pages);
ASSERTCMP(n, ==, nr);

msg->msg_flags |= MSG_MORE;
for (i = 0; i < nr; i++) {
to = PAGE_SIZE;
if (first + i >= last) {
to = op->store.last_to;
msg->msg_flags &= ~MSG_MORE;
}
bv[i].bv_page = pages[i];
bv[i].bv_len = to - offset;
bv[i].bv_offset = offset;
bytes += to - offset;
offset = 0;
}

iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
}

/*
* Advance the AFS call state when the RxRPC call ends the transmit phase.
*/
Expand All @@ -317,42 +283,6 @@ static void afs_notify_end_request_tx(struct sock *sock,
afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
}

/*
* attach the data from a bunch of pages on an inode to a call
*/
static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
{
struct afs_operation *op = call->op;
struct bio_vec bv[AFS_BVEC_MAX];
unsigned int bytes, nr, loop, offset;
pgoff_t first = op->store.first, last = op->store.last;
int ret;

offset = op->store.first_offset;
op->store.first_offset = 0;

do {
afs_load_bvec(call, msg, bv, first, last, offset);
trace_afs_send_pages(call, msg, first, last, offset);

offset = 0;
bytes = msg->msg_iter.count;
nr = msg->msg_iter.nr_segs;

ret = rxrpc_kernel_send_data(op->net->socket, call->rxcall, msg,
bytes, afs_notify_end_request_tx);
for (loop = 0; loop < nr; loop++)
put_page(bv[loop].bv_page);
if (ret < 0)
break;

first += nr;
} while (first <= last);

trace_afs_sent_pages(call, op->store.first, last, first, ret);
return ret;
}

/*
* Initiate a call and synchronously queue up the parameters for dispatch. Any
* error is stored into the call struct, which the caller must check for.
Expand Down Expand Up @@ -384,21 +314,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
* after the initial fixed part.
*/
tx_total_len = call->request_size;
if (call->send_pages) {
struct afs_operation *op = call->op;

if (op->store.last == op->store.first) {
tx_total_len += op->store.last_to - op->store.first_offset;
} else {
/* It looks mathematically like you should be able to
* combine the following lines with the ones above, but
* unsigned arithmetic is fun when it wraps...
*/
tx_total_len += PAGE_SIZE - op->store.first_offset;
tx_total_len += op->store.last_to;
tx_total_len += (op->store.last - op->store.first - 1) * PAGE_SIZE;
}
}
if (call->write_iter)
tx_total_len += iov_iter_count(call->write_iter);

/* If the call is going to be asynchronous, we need an extra ref for
* the call to hold itself so the caller need not hang on to its ref.
Expand Down Expand Up @@ -440,16 +357,26 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0);

ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
&msg, call->request_size,
afs_notify_end_request_tx);
if (ret < 0)
goto error_do_abort;

if (call->send_pages) {
ret = afs_send_pages(call, &msg);
if (call->write_iter) {
msg.msg_iter = *call->write_iter;
msg.msg_flags &= ~MSG_MORE;
trace_afs_send_data(call, &msg);

ret = rxrpc_kernel_send_data(call->net->socket,
call->rxcall, &msg,
iov_iter_count(&msg.msg_iter),
afs_notify_end_request_tx);
*call->write_iter = msg.msg_iter;

trace_afs_sent_data(call, &msg, ret);
if (ret < 0)
goto error_do_abort;
}
Expand Down
Loading

0 comments on commit bd80d8a

Please sign in to comment.