Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disk I/O optimization #1765

Merged
merged 2 commits into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions src/kernel/pagecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,21 +250,15 @@ static boolean realloc_pagelocked(pagecache pc, pagecache_page pp)
return true;
}

/* If the refcount argument is true, this function must be called with the page state lock held. */
static void pagecache_add_sgb(pagecache pc, pagecache_page pp, sg_list sg, boolean refcount)
static sg_buf pagecache_add_sgb(pagecache pc, pagecache_page pp, sg_list sg)
{
sg_buf sgb = sg_list_tail_add(sg, cache_pagesize(pc));
assert(sgb != INVALID_ADDRESS);
sgb->buf = pp->kvirt;
sgb->size = cache_pagesize(pc);
sgb->offset = 0;
if (refcount) {
sgb->refcount = &pp->read_refcount;
if (fetch_and_add(&pp->read_refcount.c, 1) == 0)
pp->refcount++;
} else {
sgb->refcount = 0;
}
sgb->refcount = 0;
return sgb;
}

/* Returns true if the page is already cached (or is being fetched from disk), false if a disk read
Expand Down Expand Up @@ -360,7 +354,7 @@ static boolean touch_or_fill_page_nodelocked(pagecache_node pn, pagecache_page p
pagecache_debug(" pc %p, pp %p, r %R, reading...\n", pc, pp, r);
sg_list sg = allocate_sg_list();
assert(sg != INVALID_ADDRESS);
pagecache_add_sgb(pc, pp, sg, false);
pagecache_add_sgb(pc, pp, sg);
apply(pn->fs_read, sg, r,
closure(pc->h, pagecache_read_page_complete, pc, pp, sg));
}
Expand Down Expand Up @@ -845,7 +839,7 @@ static void commit_dirty_node_complete(pagecache_node pn, status_handler complet
}

#ifdef KERNEL
#define COMMIT_LIMIT (128*KB)
#define COMMIT_LIMIT 32 /* number of SG buffers */
#else
#define COMMIT_LIMIT infinity
#endif
Expand Down Expand Up @@ -883,21 +877,28 @@ closure_function(3, 1, void, pagecache_commit_dirty_ranges,
u64 page_count = 0;
pagecache_page pp = first_page;
range r = *rp;
sg_buf sgb = 0;

do {
u64 page_offset = start & MASK(pc->page_order);
u64 len = pad(MIN(cache_pagesize(pc) - page_offset, r.end - start),
U64_FROM_BIT(pv->block_order));
sg_buf sgb = sg_list_tail_add(sg, len);
if (sgb == INVALID_ADDRESS) {
msg_err("sgbuf alloc fail\n");
r.end = start;
break;
if (sgb && (sgb->buf + sgb->size == pp->kvirt)) {
sgb->size += len;
sg->count += len;
} else {
sgb = sg_list_tail_add(sg, len);
if (sgb == INVALID_ADDRESS) {
msg_err("sgbuf alloc fail\n");
r.end = start;
break;
}
sgb->buf = pp->kvirt + page_offset;
sgb->offset = 0;
sgb->size = len;
sgb->refcount = 0;
committing++;
}
sgb->buf = pp->kvirt + page_offset;
sgb->offset = 0;
sgb->size = len;
sgb->refcount = 0;
pagecache_lock_state(pc);
/* Reserve the page, unless it is in DIRTY state (in which case it has been reserved
* when switching to DIRTY state). */
Expand All @@ -909,7 +910,7 @@ closure_function(3, 1, void, pagecache_commit_dirty_ranges,
page_count++;
start += len;
pp = (pagecache_page)rbnode_get_next((rbnode)pp);
if (start - r.start >= COMMIT_LIMIT && start < r.end) {
if (committing >= COMMIT_LIMIT && start < r.end) {
r.end = start;
break;
}
Expand All @@ -922,7 +923,6 @@ closure_function(3, 1, void, pagecache_commit_dirty_ranges,
break;
apply(pn->fs_write, sg, r,
closure(pc->h, pagecache_commit_complete, pc, first_page, page_count, sg, apply_merge(m)));
committing += range_span(r);
}
if (committing == 0)
s = timm("result", "%s: unable to perform i/o", __func__);
Expand Down Expand Up @@ -1039,6 +1039,7 @@ closure_function(5, 1, void, pagecache_node_fetch_complete,
change_page_state_locked(pc, pp,
is_ok(s) ? PAGECACHE_PAGESTATE_NEW : PAGECACHE_PAGESTATE_ALLOC);
pagecache_page_queue_completions_locked(pc, pp, s);
pagecache_page_release_locked(pc, pp);
pp = (pagecache_page)rbnode_get_next((rbnode)pp);
}
pagecache_unlock_state(pc);
Expand Down Expand Up @@ -1079,6 +1080,7 @@ static void pagecache_node_fetch_internal(pagecache_node pn, range q, pp_handler
sg_list read_sg = 0;
pagecache_page read_pp = 0;
range read_r;
sg_buf sgb = 0;
pagecache_lock_state(pc);
for (u64 pi = k.state_offset; pi < end; pi++) {
if (pp == INVALID_ADDRESS || page_offset(pp) > pi) {
Expand Down Expand Up @@ -1114,7 +1116,13 @@ static void pagecache_node_fetch_internal(pagecache_node pn, range q, pp_handler
read_pp = pp;
read_r = range_lshift(irangel(page_offset(pp), 0), pc->page_order);
}
pagecache_add_sgb(pc, pp, read_sg, true);
if (sgb && (sgb->buf + sgb->size == pp->kvirt)) {
sgb->size += cache_pagesize(pc);
read_sg->count += cache_pagesize(pc);
} else {
sgb = pagecache_add_sgb(pc, pp, read_sg);
}
pp->refcount++;
read_r.end += cache_pagesize(pc);
}
if (ph)
Expand Down
61 changes: 45 additions & 16 deletions src/tfs/tfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,8 @@ static fs_status filesystem_truncate_locked(filesystem fs, fsfile f, u64 len)

The life an extent depends on a particular allocation of contiguous
storage space. The extent is tied to this allocated area (nominally
page size). Only the extent data length may be updated; the file
offset, block start and allocation size are immutable. As an
page size). Only the extent data length and allocation size may be
updated; the file offset and block start are immutable. As an
optimization, adjacent extents on the disk could be joined into
larger extents with only a meta update.

Expand All @@ -603,6 +603,12 @@ static fs_status create_extent(filesystem fs, range blocks, boolean uninited, ex
return FS_STATUS_NOSPACE;

u64 start_block = filesystem_allocate_storage(fs, nblocks);
while (start_block == u64_from_pointer(INVALID_ADDRESS)) {
if (nblocks <= (MIN_EXTENT_ALLOC_SIZE >> fs->blocksize_order))
break;
nblocks /= 2;
start_block = filesystem_allocate_storage(fs, nblocks);
}
if (start_block == u64_from_pointer(INVALID_ADDRESS))
return FS_STATUS_NOSPACE;

Expand Down Expand Up @@ -687,19 +693,12 @@ static fs_status add_extents(filesystem fs, range i, rangemap rm)
{
extent ex;
fs_status fss;
while (range_span(i) >= MAX_EXTENT_SIZE) {
range r = {.start = i.start, .end = i.start + MAX_EXTENT_SIZE};
fss = create_extent(fs, r, true, &ex);
if (fss != FS_STATUS_OK)
return fss;
assert(rangemap_insert(rm, &ex->node));
i.start += MAX_EXTENT_SIZE;
}
if (range_span(i)) {
while (range_span(i)) {
fss = create_extent(fs, i, true, &ex);
if (fss != FS_STATUS_OK)
return fss;
assert(rangemap_insert(rm, &ex->node));
i.start = ex->node.r.end;
}
return FS_STATUS_OK;
}
Expand Down Expand Up @@ -836,13 +835,12 @@ static u64 write_extent(fsfile f, extent ex, sg_list sg, range blocks, merge m)

static fs_status fill_gap(fsfile f, sg_list sg, range blocks, merge m, u64 *edge)
{
blocks = irangel(blocks.start, MIN(MAX_EXTENT_SIZE >> f->fs->blocksize_order,
range_span(blocks)));
tfs_debug(" %s: writing new extent blocks %R\n", __func__, blocks);
extent ex;
fs_status fss = create_extent(f->fs, blocks, false, &ex);
if (fss != FS_STATUS_OK)
return fss;
blocks = ex->node.r;
fss = add_extent_to_file(f, ex);
if (fss != FS_STATUS_OK) {
destroy_extent(f->fs, ex);
Expand All @@ -854,12 +852,11 @@ static fs_status fill_gap(fsfile f, sg_list sg, range blocks, merge m, u64 *edge
return FS_STATUS_OK;
}

static fs_status update_extent_length(fsfile f, extent ex, u64 new_length)
static fs_status update_extent(fsfile f, extent ex, symbol l, u64 val)
{
if (f->md) {
assert(ex->md);
value v = value_from_u64(f->fs->h, new_length);
symbol l = sym(length);
value v = value_from_u64(f->fs->h, val);
fs_status s = filesystem_write_eav(f->fs, ex->md, l, v);
if (s != FS_STATUS_OK)
return s;
Expand All @@ -869,6 +866,24 @@ static fs_status update_extent_length(fsfile f, extent ex, u64 new_length)
set(ex->md, l, v);
f->status |= FSF_DIRTY_DATASYNC;
}
return FS_STATUS_OK;
}

static fs_status update_extent_allocated(fsfile f, extent ex, u64 allocated)
{
fs_status s = update_extent(f, ex, sym(allocated), allocated);
if (s != FS_STATUS_OK)
return s;
tfs_debug(" %s: was 0x%lx, now 0x%lx\n", __func__, ex->allocated, allocated);
ex->allocated = allocated;
return FS_STATUS_OK;
}

static fs_status update_extent_length(fsfile f, extent ex, u64 new_length)
{
fs_status s = update_extent(f, ex, sym(length), new_length);
if (s != FS_STATUS_OK)
return s;

/* TODO cheating; should be reinsert - update rangemap interface? */
tfs_debug(" %s: was %R\n", __func__, ex->node.r);
Expand All @@ -881,6 +896,20 @@ static fs_status extend(fsfile f, extent ex, sg_list sg, range blocks, merge m,
{
u64 free = ex->allocated - range_span(ex->node.r);
range r = irangel(ex->node.r.end, free);
if (blocks.end > r.end) {
filesystem fs = f->fs;
range new = irange(ex->start_block + ex->allocated,
MIN(ex->start_block + blocks.end, fs->size >> fs->blocksize_order));
if (range_span(new) && filesystem_reserve_storage(fs, new)) {
fs_status s = update_extent_allocated(f, ex, ex->allocated + range_span(new));
if (s == FS_STATUS_OK) {
r.end = blocks.end;
free = r.end - ex->node.r.end;
} else {
filesystem_free_storage(fs, new);
}
}
}
range i = range_intersection(r, blocks);
tfs_debug(" %s: node %R, free 0x%lx (%R), i %R\n", __func__, ex->node.r, free, r, i);
if (range_span(i) == 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/tfs/tfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pagecache_node fsfile_get_cachenode(fsfile f);
extern io_status_handler ignore_io_status;

#define MIN_EXTENT_SIZE PAGESIZE
#define MAX_EXTENT_SIZE (1 * MB)
#define MIN_EXTENT_ALLOC_SIZE (1 * MB)

boolean filesystem_probe(u8 *first_sector, u8 *uuid, char *label);
const char *filesystem_get_label(filesystem fs);
Expand Down