Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ee4cdf7

Browse files
dhowellsbrauner
authored andcommittedSep 12, 2024
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells <dhowells@redhat.com> cc: Jeff Layton <jlayton@kernel.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 2e45b92 commit ee4cdf7

28 files changed

+2058
-470
lines changed
 

‎fs/9p/vfs_addr.c

+8-3
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,22 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
6868
{
6969
struct netfs_io_request *rreq = subreq->rreq;
7070
struct p9_fid *fid = rreq->netfs_priv;
71+
unsigned long long pos = subreq->start + subreq->transferred;
7172
int total, err;
7273

73-
total = p9_client_read(fid, subreq->start + subreq->transferred,
74-
&subreq->io_iter, &err);
74+
total = p9_client_read(fid, pos, &subreq->io_iter, &err);
7575

7676
/* if we just extended the file size, any portion not in
7777
* cache won't be on server and is zeroes */
7878
if (subreq->rreq->origin != NETFS_DIO_READ)
7979
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
80+
if (pos + total >= i_size_read(rreq->inode))
81+
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
8082

81-
netfs_subreq_terminated(subreq, err ?: total, false);
83+
if (!err)
84+
subreq->transferred += total;
85+
86+
netfs_read_subreq_terminated(subreq, err, false);
8287
}
8388

8489
/**

‎fs/afs/file.c

+15-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/mm.h>
1717
#include <linux/swap.h>
1818
#include <linux/netfs.h>
19+
#include <trace/events/netfs.h>
1920
#include "internal.h"
2021

2122
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
@@ -242,9 +243,10 @@ static void afs_fetch_data_notify(struct afs_operation *op)
242243

243244
req->error = error;
244245
if (subreq) {
245-
if (subreq->rreq->origin != NETFS_DIO_READ)
246-
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
247-
netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
246+
subreq->rreq->i_size = req->file_size;
247+
if (req->pos + req->actual_len >= req->file_size)
248+
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
249+
netfs_read_subreq_terminated(subreq, error, false);
248250
req->subreq = NULL;
249251
} else if (req->done) {
250252
req->done(req);
@@ -262,6 +264,12 @@ static void afs_fetch_data_success(struct afs_operation *op)
262264
afs_fetch_data_notify(op);
263265
}
264266

267+
static void afs_fetch_data_aborted(struct afs_operation *op)
268+
{
269+
afs_check_for_remote_deletion(op);
270+
afs_fetch_data_notify(op);
271+
}
272+
265273
static void afs_fetch_data_put(struct afs_operation *op)
266274
{
267275
op->fetch.req->error = afs_op_error(op);
@@ -272,7 +280,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
272280
.issue_afs_rpc = afs_fs_fetch_data,
273281
.issue_yfs_rpc = yfs_fs_fetch_data,
274282
.success = afs_fetch_data_success,
275-
.aborted = afs_check_for_remote_deletion,
283+
.aborted = afs_fetch_data_aborted,
276284
.failed = afs_fetch_data_notify,
277285
.put = afs_fetch_data_put,
278286
};
@@ -294,7 +302,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
294302
op = afs_alloc_operation(req->key, vnode->volume);
295303
if (IS_ERR(op)) {
296304
if (req->subreq)
297-
netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
305+
netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
298306
return PTR_ERR(op);
299307
}
300308

@@ -313,7 +321,7 @@ static void afs_read_worker(struct work_struct *work)
313321

314322
fsreq = afs_alloc_read(GFP_NOFS);
315323
if (!fsreq)
316-
return netfs_subreq_terminated(subreq, -ENOMEM, false);
324+
return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
317325

318326
fsreq->subreq = subreq;
319327
fsreq->pos = subreq->start + subreq->transferred;
@@ -322,6 +330,7 @@ static void afs_read_worker(struct work_struct *work)
322330
fsreq->vnode = vnode;
323331
fsreq->iter = &subreq->io_iter;
324332

333+
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
325334
afs_fetch_data(fsreq->vnode, fsreq);
326335
afs_put_read(fsreq);
327336
}

‎fs/afs/fsclient.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
304304
struct afs_vnode_param *vp = &op->file[0];
305305
struct afs_read *req = op->fetch.req;
306306
const __be32 *bp;
307+
size_t count_before;
307308
int ret;
308309

309310
_enter("{%u,%zu,%zu/%llu}",
@@ -345,10 +346,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
345346

346347
/* extract the returned data */
347348
case 2:
348-
_debug("extract data %zu/%llu",
349-
iov_iter_count(call->iter), req->actual_len);
349+
count_before = call->iov_len;
350+
_debug("extract data %zu/%llu", count_before, req->actual_len);
350351

351352
ret = afs_extract_data(call, true);
353+
if (req->subreq) {
354+
req->subreq->transferred += count_before - call->iov_len;
355+
netfs_read_subreq_progress(req->subreq, false);
356+
}
352357
if (ret < 0)
353358
return ret;
354359

‎fs/afs/yfsclient.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
355355
struct afs_vnode_param *vp = &op->file[0];
356356
struct afs_read *req = op->fetch.req;
357357
const __be32 *bp;
358+
size_t count_before;
358359
int ret;
359360

360361
_enter("{%u,%zu, %zu/%llu}",
@@ -391,10 +392,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
391392

392393
/* extract the returned data */
393394
case 2:
394-
_debug("extract data %zu/%llu",
395-
iov_iter_count(call->iter), req->actual_len);
395+
count_before = call->iov_len;
396+
_debug("extract data %zu/%llu", count_before, req->actual_len);
396397

397398
ret = afs_extract_data(call, true);
399+
if (req->subreq) {
400+
req->subreq->transferred += count_before - call->iov_len;
401+
netfs_read_subreq_progress(req->subreq, false);
402+
}
398403
if (ret < 0)
399404
return ret;
400405

‎fs/ceph/addr.c

+46-30
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/iversion.h>
1414
#include <linux/ktime.h>
1515
#include <linux/netfs.h>
16+
#include <trace/events/netfs.h>
1617

1718
#include "super.h"
1819
#include "mds_client.h"
@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
205206
}
206207
}
207208

208-
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
209-
{
210-
struct inode *inode = subreq->rreq->inode;
211-
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
212-
struct ceph_inode_info *ci = ceph_inode(inode);
213-
u64 objno, objoff;
214-
u32 xlen;
215-
216-
/* Truncate the extent at the end of the current block */
217-
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
218-
&objno, &objoff, &xlen);
219-
subreq->len = min(xlen, fsc->mount_options->rsize);
220-
return true;
221-
}
222-
223209
static void finish_netfs_read(struct ceph_osd_request *req)
224210
{
225211
struct inode *inode = req->r_inode;
@@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req)
264250
calc_pages_for(osd_data->alignment,
265251
osd_data->length), false);
266252
}
267-
netfs_subreq_terminated(subreq, err, false);
253+
if (err > 0) {
254+
subreq->transferred = err;
255+
err = 0;
256+
}
257+
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
258+
netfs_read_subreq_terminated(subreq, err, false);
268259
iput(req->r_inode);
269260
ceph_dec_osd_stopping_blocker(fsc->mdsc);
270261
}
@@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
278269
struct ceph_mds_request *req;
279270
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
280271
struct ceph_inode_info *ci = ceph_inode(inode);
281-
struct iov_iter iter;
282272
ssize_t err = 0;
283273
size_t len;
284274
int mode;
@@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
301291
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
302292
req->r_num_caps = 2;
303293

294+
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
304295
err = ceph_mdsc_do_request(mdsc, NULL, req);
305296
if (err < 0)
306297
goto out;
@@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
314305
}
315306

316307
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
317-
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
318-
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
319-
if (err == 0)
308+
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter);
309+
if (err == 0) {
320310
err = -EFAULT;
311+
} else {
312+
subreq->transferred += err;
313+
err = 0;
314+
}
321315

322316
ceph_mdsc_put_request(req);
323317
out:
324-
netfs_subreq_terminated(subreq, err, false);
318+
netfs_read_subreq_terminated(subreq, err, false);
325319
return true;
326320
}
327321

322+
static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq)
323+
{
324+
struct netfs_io_request *rreq = subreq->rreq;
325+
struct inode *inode = rreq->inode;
326+
struct ceph_inode_info *ci = ceph_inode(inode);
327+
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
328+
u64 objno, objoff;
329+
u32 xlen;
330+
331+
/* Truncate the extent at the end of the current block */
332+
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
333+
&objno, &objoff, &xlen);
334+
rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize);
335+
return 0;
336+
}
337+
328338
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
329339
{
330340
struct netfs_io_request *rreq = subreq->rreq;
@@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
334344
struct ceph_client *cl = fsc->client;
335345
struct ceph_osd_request *req = NULL;
336346
struct ceph_vino vino = ceph_vino(inode);
337-
struct iov_iter iter;
338-
int err = 0;
339-
u64 len = subreq->len;
347+
int err;
348+
u64 len;
340349
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
341350
u64 off = subreq->start;
342351
int extent_cnt;
@@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
349358
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
350359
return;
351360

361+
// TODO: This rounding here is slightly dodgy. It *should* work, for
362+
// now, as the cache only deals in blocks that are a multiple of
363+
// PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to
364+
// happen is for the fscrypt driving to be moved into netfslib and the
365+
// data in the cache also to be stored encrypted.
366+
len = subreq->len;
352367
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
353368

354369
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
@@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
371386
doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
372387
ceph_vinop(inode), subreq->start, subreq->len, len);
373388

374-
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
375-
376389
/*
377390
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
378391
* encrypted inodes. We'd need infrastructure that handles an iov_iter
@@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
384397
struct page **pages;
385398
size_t page_off;
386399

387-
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
400+
err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
388401
if (err < 0) {
389402
doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
390403
ceph_vinop(inode), err);
@@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
399412
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
400413
false);
401414
} else {
402-
osd_req_op_extent_osd_iter(req, 0, &iter);
415+
osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter);
403416
}
404417
if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
405418
err = -EIO;
@@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
410423
req->r_inode = inode;
411424
ihold(inode);
412425

426+
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
413427
ceph_osdc_start_request(req->r_osdc, req);
414428
out:
415429
ceph_osdc_put_request(req);
416430
if (err)
417-
netfs_subreq_terminated(subreq, err, false);
431+
netfs_read_subreq_terminated(subreq, err, false);
418432
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
419433
}
420434

421435
static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
422436
{
423437
struct inode *inode = rreq->inode;
438+
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
424439
struct ceph_client *cl = ceph_inode_to_client(inode);
425440
int got = 0, want = CEPH_CAP_FILE_CACHE;
426441
struct ceph_netfs_request_data *priv;
@@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
472487

473488
priv->caps = got;
474489
rreq->netfs_priv = priv;
490+
rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize;
475491

476492
out:
477493
if (ret < 0)
@@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
496512
const struct netfs_request_ops ceph_netfs_ops = {
497513
.init_request = ceph_init_request,
498514
.free_request = ceph_netfs_free_request,
515+
.prepare_read = ceph_netfs_prepare_read,
499516
.issue_read = ceph_netfs_issue_read,
500517
.expand_readahead = ceph_netfs_expand_readahead,
501-
.clamp_length = ceph_netfs_clamp_length,
502518
.check_write_begin = ceph_netfs_check_write_begin,
503519
};
504520

‎fs/netfs/Makefile

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ netfs-y := \
55
buffered_write.o \
66
direct_read.o \
77
direct_write.o \
8-
io.o \
98
iterator.o \
109
locking.o \
1110
main.o \
1211
misc.o \
1312
objects.o \
13+
read_collect.o \
14+
read_pgpriv2.o \
15+
read_retry.o \
1416
write_collect.o \
1517
write_issue.o
1618

0 commit comments

Comments
 (0)
Please sign in to comment.