Skip to content

Commit

Permalink
Changed arc evict to prioritize unloaded spas
Browse files Browse the repository at this point in the history
When there are active async flushes, then the eviction
thread can focus exclusively on buffers belonging to any
spa that is being flushed.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.

Signed-off-by: Don Brady <don.brady@klarasystems.com>
  • Loading branch information
don-brady committed Aug 23, 2024
1 parent 52b68cd commit 3d835dd
Showing 1 changed file with 64 additions and 12 deletions.
76 changes: 64 additions & 12 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,8 @@ typedef struct arc_async_flush {
list_node_t af_node;
} arc_async_flush_t;

static unsigned int arc_async_flush_init_spa_list(uint64_t spa_list[],
unsigned int list_len);

/*
* Level 2 ARC
Expand Down Expand Up @@ -3884,9 +3886,20 @@ arc_set_need_free(void)
}
}

static boolean_t
arc_spa_is_list_member(uint64_t spa_guid, uint64_t spa_list[],
unsigned int spa_cnt)
{
for (int i = 0; i < spa_cnt; i++) {
if (spa_list[i] == spa_guid)
return (B_TRUE);
}
return (B_FALSE);
}

static uint64_t
arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
uint64_t spa, uint64_t bytes)
uint64_t bytes, uint64_t spa_list[], unsigned int spa_cnt)
{
multilist_sublist_t *mls;
uint64_t bytes_evicted = 0, real_evicted = 0;
Expand Down Expand Up @@ -3928,8 +3941,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
if (hdr->b_spa == 0)
continue;

/* we're only interested in evicting buffers of a certain spa */
if (spa != 0 && hdr->b_spa != spa) {
/*
* Check if we're only interested in evicting buffers from
* a specifc list of spas. This would typically be from
* spas that are being unloaded.
*/
if (spa_cnt > 0 &&
!arc_spa_is_list_member(hdr->b_spa, spa_list, spa_cnt)) {
ARCSTAT_BUMP(arcstat_evict_skip);
continue;
}
Expand Down Expand Up @@ -4065,8 +4083,8 @@ arc_state_free_markers(arc_buf_hdr_t **markers, int count)
* the given arc state; which is used by arc_flush().
*/
static uint64_t
arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
uint64_t bytes)
arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t bytes,
uint64_t spa_list[], unsigned int spa_cnt)
{
uint64_t total_evicted = 0;
multilist_t *ml = &state->arcs_list[type];
Expand Down Expand Up @@ -4121,7 +4139,8 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
break;

bytes_evicted = arc_evict_state_impl(ml, sublist_idx,
markers[sublist_idx], spa, bytes_remaining);
markers[sublist_idx], bytes_remaining, spa_list,
spa_cnt);

scan_evicted += bytes_evicted;
total_evicted += bytes_evicted;
Expand Down Expand Up @@ -4186,9 +4205,11 @@ arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type,
boolean_t retry)
{
uint64_t evicted = 0;
uint64_t spa_list[1] = {spa};

while (zfs_refcount_count(&state->arcs_esize[type]) != 0) {
evicted += arc_evict_state(state, type, spa, ARC_EVICT_ALL);
evicted += arc_evict_state(state, type, ARC_EVICT_ALL,
spa_list, spa == 0 ? 0 : 1);

if (!retry)
break;
Expand All @@ -4212,7 +4233,15 @@ arc_evict_impl(arc_state_t *state, arc_buf_contents_t type, int64_t bytes)
if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
bytes);
return (arc_evict_state(state, type, 0, delta));
/*
* Create a list of guids from any active ARC async flushes.
* The underlying arc_evict_state() function will target
* only spa guids from this list when it is not empty.
*/
uint64_t spa_list[16];
unsigned int spa_cnt =
arc_async_flush_init_spa_list(spa_list, 16);
return (arc_evict_state(state, type, delta, spa_list, spa_cnt));
}

return (0);
Expand Down Expand Up @@ -4516,8 +4545,8 @@ arc_flush_async(spa_t *spa)
* unlikely, but if we couldn't dispatch then use an inline flush
*/
if (tid == TASKQID_INVALID) {
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
arc_flush_impl(spa_guid, B_FALSE);
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
}
}

Expand All @@ -4539,6 +4568,30 @@ arc_async_flush_guid_inuse(uint64_t spa_guid)
return (B_FALSE);
}

/*
* Initialize a list of spa guids that are being flushed.
*
* Used by arc_evict_state() to target headers belonging to spas on this list.
*/
static unsigned int
arc_async_flush_init_spa_list(uint64_t spa_list[], unsigned int list_len)
{
unsigned int init_cnt = 0;

/*
* Iterate until the end of the list or array slots are full.
*/
mutex_enter(&arc_async_flush_lock);
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
init_cnt < list_len && af != NULL;
af = list_next(&arc_async_flush_list, af)) {
spa_list[init_cnt++] = af->af_spa_guid;
}
mutex_exit(&arc_async_flush_lock);

return (init_cnt);
}

uint64_t
arc_reduce_target_size(uint64_t to_free)
{
Expand Down Expand Up @@ -9914,8 +9967,7 @@ l2arc_device_teardown(void *arg)
(u_longlong_t)elaspsed);
}

if (rva->rva_task_id != TASKQID_INVALID)
arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id);
arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id);

kmem_free(rva, sizeof (remove_vdev_args_t));
}
Expand Down Expand Up @@ -9990,8 +10042,8 @@ l2arc_remove_vdev(vdev_t *vd)
mutex_exit(&arc_async_flush_lock);

if (tid == TASKQID_INVALID) {
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
l2arc_device_teardown(rva);
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
}
}

Expand Down

0 comments on commit 3d835dd

Please sign in to comment.