diff --git a/cmd/arc_summary/arc_summary2 b/cmd/arc_summary/arc_summary2 index 5dc40d759dce..c4b0ace62bee 100755 --- a/cmd/arc_summary/arc_summary2 +++ b/cmd/arc_summary/arc_summary2 @@ -219,12 +219,30 @@ def get_arc_summary(Kstat): deleted = Kstat["kstat.zfs.misc.arcstats.deleted"] mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"] evict_skip = Kstat["kstat.zfs.misc.arcstats.evict_skip"] + evict_l2_cached = Kstat["kstat.zfs.misc.arcstats.evict_l2_cached"] + evict_l2_eligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible"] + evict_l2_eligible_mfu = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mfu"] + evict_l2_eligible_mru = Kstat["kstat.zfs.misc.arcstats.evict_l2_eligible_mru"] + evict_l2_ineligible = Kstat["kstat.zfs.misc.arcstats.evict_l2_ineligible"] + evict_l2_skip = Kstat["kstat.zfs.misc.arcstats.evict_l2_skip"] # ARC Misc. output["arc_misc"] = {} output["arc_misc"]["deleted"] = fHits(deleted) - output["arc_misc"]['mutex_miss'] = fHits(mutex_miss) - output["arc_misc"]['evict_skips'] = fHits(evict_skip) + output["arc_misc"]["mutex_miss"] = fHits(mutex_miss) + output["arc_misc"]["evict_skips"] = fHits(evict_skip) + output["arc_misc"]["evict_l2_skip"] = fHits(evict_l2_skip) + output["arc_misc"]["evict_l2_cached"] = fBytes(evict_l2_cached) + output["arc_misc"]["evict_l2_eligible"] = fBytes(evict_l2_eligible) + output["arc_misc"]["evict_l2_eligible_mfu"] = { + 'per': fPerc(evict_l2_eligible_mfu, evict_l2_eligible), + 'num': fBytes(evict_l2_eligible_mfu), + } + output["arc_misc"]["evict_l2_eligible_mru"] = { + 'per': fPerc(evict_l2_eligible_mru, evict_l2_eligible), + 'num': fBytes(evict_l2_eligible_mru), + } + output["arc_misc"]["evict_l2_ineligible"] = fBytes(evict_l2_ineligible) # ARC Sizing arc_size = Kstat["kstat.zfs.misc.arcstats.size"] @@ -340,8 +358,26 @@ def _arc_summary(Kstat): sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted']) sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" % arc['arc_misc']['mutex_miss']) - sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" % + sys.stdout.write("\tEviction Skips:\t\t\t\t%s\n" % arc['arc_misc']['evict_skips']) + sys.stdout.write("\tEviction Skips Due to L2 Writes:\t%s\n" % + arc['arc_misc']['evict_l2_skip']) + sys.stdout.write("\tL2 Cached Evictions:\t\t\t%s\n" % + arc['arc_misc']['evict_l2_cached']) + sys.stdout.write("\tL2 Eligible Evictions:\t\t\t%s\n" % + arc['arc_misc']['evict_l2_eligible']) + sys.stdout.write("\tL2 Eligible MFU Evictions:\t%s\t%s\n" % ( + arc['arc_misc']['evict_l2_eligible_mfu']['per'], + arc['arc_misc']['evict_l2_eligible_mfu']['num'], + ) + ) + sys.stdout.write("\tL2 Eligible MRU Evictions:\t%s\t%s\n" % ( + arc['arc_misc']['evict_l2_eligible_mru']['per'], + arc['arc_misc']['evict_l2_eligible_mru']['num'], + ) + ) + sys.stdout.write("\tL2 Ineligible Evictions:\t\t%s\n" % + arc['arc_misc']['evict_l2_ineligible']) sys.stdout.write("\n") # ARC Sizing @@ -677,6 +713,11 @@ def get_l2arc_summary(Kstat): l2_writes_done = Kstat["kstat.zfs.misc.arcstats.l2_writes_done"] l2_writes_error = Kstat["kstat.zfs.misc.arcstats.l2_writes_error"] l2_writes_sent = Kstat["kstat.zfs.misc.arcstats.l2_writes_sent"] + l2_mfu_asize = Kstat["kstat.zfs.misc.arcstats.l2_mfu_asize"] + l2_mru_asize = Kstat["kstat.zfs.misc.arcstats.l2_mru_asize"] + l2_prefetch_asize = Kstat["kstat.zfs.misc.arcstats.l2_prefetch_asize"] + l2_bufc_data_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_data_asize"] + l2_bufc_metadata_asize = Kstat["kstat.zfs.misc.arcstats.l2_bufc_metadata_asize"] l2_access_total = (l2_hits + l2_misses) output['l2_health_count'] = (l2_writes_error + l2_cksum_bad + l2_io_error) @@ -699,7 +740,7 @@ def get_l2arc_summary(Kstat): output["io_errors"] = fHits(l2_io_error) output["l2_arc_size"] = {} - output["l2_arc_size"]["adative"] = fBytes(l2_size) + output["l2_arc_size"]["adaptive"] = fBytes(l2_size) output["l2_arc_size"]["actual"] = { 'per': fPerc(l2_asize, l2_size), 'num': fBytes(l2_asize) @@ -708,6 +749,26 @@ def get_l2arc_summary(Kstat): 'per': fPerc(l2_hdr_size, l2_size), 'num': fBytes(l2_hdr_size), } + output["l2_arc_size"]["mfu_asize"] = { + 'per': fPerc(l2_mfu_asize, l2_asize), + 'num': fBytes(l2_mfu_asize), + } + output["l2_arc_size"]["mru_asize"] = { + 'per': fPerc(l2_mru_asize, l2_asize), + 'num': fBytes(l2_mru_asize), + } + output["l2_arc_size"]["prefetch_asize"] = { + 'per': fPerc(l2_prefetch_asize, l2_asize), + 'num': fBytes(l2_prefetch_asize), + } + output["l2_arc_size"]["bufc_data_asize"] = { + 'per': fPerc(l2_bufc_data_asize, l2_asize), + 'num': fBytes(l2_bufc_data_asize), + } + output["l2_arc_size"]["bufc_metadata_asize"] = { + 'per': fPerc(l2_bufc_metadata_asize, l2_asize), + 'num': fBytes(l2_bufc_metadata_asize), + } output["l2_arc_evicts"] = {} output["l2_arc_evicts"]['lock_retries'] = fHits(l2_evict_lock_retry) @@ -772,7 +833,7 @@ def _l2arc_summary(Kstat): sys.stdout.write("\n") sys.stdout.write("L2 ARC Size: (Adaptive)\t\t\t\t%s\n" % - arc["l2_arc_size"]["adative"]) + arc["l2_arc_size"]["adaptive"]) sys.stdout.write("\tCompressed:\t\t\t%s\t%s\n" % ( arc["l2_arc_size"]["actual"]["per"], arc["l2_arc_size"]["actual"]["num"], @@ -783,11 +844,36 @@ def _l2arc_summary(Kstat): arc["l2_arc_size"]["head_size"]["num"], ) ) + sys.stdout.write("\tMFU Comp. Size:\t\t\t%s\t%s\n" % ( + arc["l2_arc_size"]["mfu_asize"]["per"], + arc["l2_arc_size"]["mfu_asize"]["num"], + ) + ) + sys.stdout.write("\tMRU Comp. Size:\t\t\t%s\t%s\n" % ( + arc["l2_arc_size"]["mru_asize"]["per"], + arc["l2_arc_size"]["mru_asize"]["num"], + ) + ) + sys.stdout.write("\tPrefetch Comp. Size:\t\t%s\t%s\n" % ( + arc["l2_arc_size"]["prefetch_asize"]["per"], + arc["l2_arc_size"]["prefetch_asize"]["num"], + ) + ) + sys.stdout.write("\tData (buf content) Comp. Size:\t%s\t%s\n" % ( + arc["l2_arc_size"]["bufc_data_asize"]["per"], + arc["l2_arc_size"]["bufc_data_asize"]["num"], + ) + ) + sys.stdout.write("\tMetadata (buf content) Size:\t%s\t%s\n" % ( + arc["l2_arc_size"]["bufc_metadata_asize"]["per"], + arc["l2_arc_size"]["bufc_metadata_asize"]["num"], + ) + ) sys.stdout.write("\n") if arc["l2_arc_evicts"]['lock_retries'] != '0' or \ arc["l2_arc_evicts"]["reading"] != '0': - sys.stdout.write("L2 ARC Evicts:\n") + sys.stdout.write("L2 ARC Evictions:\n") sys.stdout.write("\tLock Retries:\t\t\t\t%s\n" % arc["l2_arc_evicts"]['lock_retries']) sys.stdout.write("\tUpon Reading:\t\t\t\t%s\n" % diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index c920b8e5395d..8a1e14802b10 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -598,6 +598,20 @@ def section_arc(kstats_dict): prt_i1('Deleted:', f_hits(arc_stats['deleted'])) prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss'])) prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip'])) + prt_i1('Eviction skips due to L2 writes:', + f_hits(arc_stats['evict_l2_skip'])) + prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached'])) + prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible'])) + prt_i2('L2 eligible MFU evictions:', + f_perc(arc_stats['evict_l2_eligible_mfu'], + arc_stats['evict_l2_eligible']), + f_bytes(arc_stats['evict_l2_eligible_mfu'])) + prt_i2('L2 eligible MRU evictions:', + f_perc(arc_stats['evict_l2_eligible_mru'], + arc_stats['evict_l2_eligible']), + f_bytes(arc_stats['evict_l2_eligible_mru'])) + prt_i1('L2 ineligible evictions:', + f_bytes(arc_stats['evict_l2_ineligible'])) print() @@ -736,6 +750,21 @@ def section_l2arc(kstats_dict): prt_i2('Header size:', f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']), f_bytes(arc_stats['l2_hdr_size'])) + prt_i2('MFU aligned size:', + f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']), + f_bytes(arc_stats['l2_mfu_asize'])) + prt_i2('MRU aligned size:', + f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']), + f_bytes(arc_stats['l2_mru_asize'])) + prt_i2('Prefetch aligned size:', + f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']), + f_bytes(arc_stats['l2_prefetch_asize'])) + prt_i2('Data (buffer content) aligned size:', + f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']), + f_bytes(arc_stats['l2_bufc_data_asize'])) + prt_i2('Metadata (buffer content) aligned size:', + f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']), + f_bytes(arc_stats['l2_bufc_metadata_asize'])) print() prt_1('L2ARC breakdown:', f_hits(l2_access_total)) diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in index c83a1c74599e..9b0f834a0489 100755 --- a/cmd/arcstat/arcstat.in +++ b/cmd/arcstat/arcstat.in @@ -88,6 +88,12 @@ cols = { "mfug": [4, 1000, "MFU ghost list hits per second"], "mrug": [4, 1000, "MRU ghost list hits per second"], "eskip": [5, 1000, "evict_skip per second"], + "el2skip": [7, 1000, "evict skip, due to l2 writes, per second"], + "el2cach": [7, 1024, "Size of L2 cached evictions per second"], + "el2el": [5, 1024, "Size of L2 eligible evictions per second"], + "el2mfu": [6, 1024, "Size of L2 eligible MFU evictions per second"], + "el2mru": [6, 1024, "Size of L2 eligible MRU evictions per second"], + "el2inel": [7, 1024, "Size of L2 ineligible evictions per second"], "mtxmis": [6, 1000, "mutex_miss per second"], "dread": [5, 1000, "Demand accesses per second"], "pread": [5, 1000, "Prefetch accesses per second"], @@ -96,6 +102,16 @@ cols = { "l2read": [6, 1000, "Total L2ARC accesses per second"], "l2hit%": [6, 100, "L2ARC access hit percentage"], "l2miss%": [7, 100, "L2ARC access miss percentage"], + "l2pref": [6, 1024, "L2ARC prefetch aligned size"], + "l2mfu": [5, 1024, "L2ARC MFU aligned size"], + "l2mru": [5, 1024, "L2ARC MRU aligned size"], + "l2data": [6, 1024, "L2ARC data aligned size"], + "l2meta": [6, 1024, "L2ARC metadata aligned size"], + "l2pref%": [7, 100, "L2ARC prefetch percentage"], + "l2mfu%": [6, 100, "L2ARC MFU percentage"], + "l2mru%": [6, 100, "L2ARC MRU percentage"], + "l2data%": [7, 100, "L2ARC data percentage"], + "l2meta%": [7, 100, "L2ARC metadata percentage"], "l2asize": [7, 1024, "Actual (compressed) size of the L2ARC"], "l2size": [6, 1024, "Size of the L2ARC"], "l2bytes": [7, 1024, "Bytes read per second from the L2ARC"], @@ -436,6 +452,12 @@ def calculate(): v["mrug"] = d["mru_ghost_hits"] / sint v["mfug"] = d["mfu_ghost_hits"] / sint v["eskip"] = d["evict_skip"] / sint + v["el2skip"] = d["evict_l2_skip"] / sint + v["el2cach"] = d["evict_l2_cached"] / sint + v["el2el"] = d["evict_l2_eligible"] / sint + v["el2mfu"] = d["evict_l2_eligible_mfu"] / sint + v["el2mru"] = d["evict_l2_eligible_mru"] / sint + v["el2inel"] = d["evict_l2_ineligible"] / sint v["mtxmis"] = d["mutex_miss"] / sint if l2exist: @@ -449,6 +471,17 @@ def calculate(): v["l2size"] = cur["l2_size"] v["l2bytes"] = d["l2_read_bytes"] / sint + v["l2pref"] = cur["l2_prefetch_asize"] + v["l2mfu"] = cur["l2_mfu_asize"] + v["l2mru"] = cur["l2_mru_asize"] + v["l2data"] = cur["l2_bufc_data_asize"] + v["l2meta"] = cur["l2_bufc_metadata_asize"] + v["l2pref%"] = 100 * v["l2pref"] / v["l2asize"] + v["l2mfu%"] = 100 * v["l2mfu"] / v["l2asize"] + v["l2mru%"] = 100 * v["l2mru"] / v["l2asize"] + v["l2data%"] = 100 * v["l2data"] / v["l2asize"] + v["l2meta%"] = 100 * v["l2meta"] / v["l2asize"] + v["grow"] = 0 if cur["arc_no_grow"] else 1 v["need"] = cur["arc_need_free"] v["free"] = cur["memory_free_bytes"] diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index e7211711a41c..7485d22c3962 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -4188,6 +4188,8 @@ dump_l2arc_log_entries(uint64_t log_entries, (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop)); (void) printf("|\t\t\t\taddress: %llu\n", (u_longlong_t)le[j].le_daddr); + (void) printf("|\t\t\t\tstate: %llu\n", + (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop)); (void) printf("|\n"); } (void) printf("\n"); diff --git a/configure.ac b/configure.ac index 199187ce51bb..38f2f12afd5b 100644 --- a/configure.ac +++ b/configure.ac @@ -338,6 +338,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/inheritance/Makefile tests/zfs-tests/tests/functional/inuse/Makefile tests/zfs-tests/tests/functional/io/Makefile + tests/zfs-tests/tests/functional/l2arc_arcstats/Makefile tests/zfs-tests/tests/functional/large_files/Makefile tests/zfs-tests/tests/functional/largest_pool/Makefile tests/zfs-tests/tests/functional/libzfs/Makefile diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index c5061695d944..5555ef24c5da 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -350,6 +350,8 @@ typedef struct l2arc_lb_ptr_buf { #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x) #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1) #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x) +#define L2BLK_GET_STATE(field) BF64_GET((field), 57, 4) +#define L2BLK_SET_STATE(field, x) BF64_SET((field), 57, 4, x) #define PTR_SWAP(x, y) \ do { \ @@ -446,6 +448,7 @@ typedef struct l2arc_buf_hdr { uint64_t b_daddr; /* disk address, offset byte */ uint32_t b_hits; list_node_t b_l2node; + arc_state_type_t b_arcs_state; } l2arc_buf_hdr_t; typedef struct l2arc_write_callback { @@ -546,6 +549,8 @@ typedef struct arc_stats { kstat_named_t arcstat_evict_not_enough; kstat_named_t arcstat_evict_l2_cached; kstat_named_t arcstat_evict_l2_eligible; + kstat_named_t arcstat_evict_l2_eligible_mfu; + kstat_named_t arcstat_evict_l2_eligible_mru; kstat_named_t arcstat_evict_l2_ineligible; kstat_named_t arcstat_evict_l2_skip; kstat_named_t arcstat_hash_elements; @@ -744,6 +749,18 @@ typedef struct arc_stats { kstat_named_t arcstat_mfu_ghost_evictable_metadata; kstat_named_t arcstat_l2_hits; kstat_named_t arcstat_l2_misses; + /* + * Allocated size (in bytes) of L2ARC cached buffers by ARC state. + */ + kstat_named_t arcstat_l2_prefetch_asize; + kstat_named_t arcstat_l2_mru_asize; + kstat_named_t arcstat_l2_mfu_asize; + /* + * Allocated size (in bytes) of L2ARC cached buffers by buffer content + * type. + */ + kstat_named_t arcstat_l2_bufc_data_asize; + kstat_named_t arcstat_l2_bufc_metadata_asize; kstat_named_t arcstat_l2_feeds; kstat_named_t arcstat_l2_rw_clash; kstat_named_t arcstat_l2_read_bytes; diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 904c325f37a1..50ee8ae82932 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -491,6 +491,8 @@ arc_stats_t arc_stats = { { "evict_not_enough", KSTAT_DATA_UINT64 }, { "evict_l2_cached", KSTAT_DATA_UINT64 }, { "evict_l2_eligible", KSTAT_DATA_UINT64 }, + { "evict_l2_eligible_mfu", KSTAT_DATA_UINT64 }, + { "evict_l2_eligible_mru", KSTAT_DATA_UINT64 }, { "evict_l2_ineligible", KSTAT_DATA_UINT64 }, { "evict_l2_skip", KSTAT_DATA_UINT64 }, { "hash_elements", KSTAT_DATA_UINT64 }, @@ -532,6 +534,11 @@ arc_stats_t arc_stats = { { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, { "l2_hits", KSTAT_DATA_UINT64 }, { "l2_misses", KSTAT_DATA_UINT64 }, + { "l2_prefetch_asize", KSTAT_DATA_UINT64 }, + { "l2_mru_asize", KSTAT_DATA_UINT64 }, + { "l2_mfu_asize", KSTAT_DATA_UINT64 }, + { "l2_bufc_data_asize", KSTAT_DATA_UINT64 }, + { "l2_bufc_metadata_asize", KSTAT_DATA_UINT64 }, { "l2_feeds", KSTAT_DATA_UINT64 }, { "l2_rw_clash", KSTAT_DATA_UINT64 }, { "l2_read_bytes", KSTAT_DATA_UINT64 }, @@ -893,6 +900,8 @@ static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); static void l2arc_read_done(zio_t *); static void l2arc_do_free_on_write(void); +static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, + boolean_t state_only); /* * L2ARC TRIM @@ -1720,7 +1729,7 @@ static arc_buf_hdr_t * arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth, enum zio_compress compress, uint8_t complevel, boolean_t protected, - boolean_t prefetch) + boolean_t prefetch, arc_state_type_t arcs_state) { arc_buf_hdr_t *hdr; @@ -1744,6 +1753,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, hdr->b_l2hdr.b_dev = dev; hdr->b_l2hdr.b_daddr = daddr; + hdr->b_l2hdr.b_arcs_state = arcs_state; return (hdr); } @@ -2305,7 +2315,11 @@ add_reference(arc_buf_hdr_t *hdr, void *tag) arc_evictable_space_decrement(hdr, state); } /* remove the prefetch flag if we get a reference */ + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_FALSE, B_TRUE); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_TRUE, B_TRUE); } } @@ -2588,9 +2602,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, } } - if (HDR_HAS_L1HDR(hdr)) + if (HDR_HAS_L1HDR(hdr)) { hdr->b_l1hdr.b_state = new_state; + if (HDR_HAS_L2HDR(hdr) && new_state != arc_l2c_only) { + l2arc_hdr_arcstats_update(hdr, B_FALSE, B_TRUE); + hdr->b_l2hdr.b_arcs_state = new_state->arcs_state; + l2arc_hdr_arcstats_update(hdr, B_TRUE, B_TRUE); + } + } + /* * L2 headers should never be on the L2 state list since they don't * have L1 headers allocated. @@ -3677,6 +3698,76 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder, return (buf); } +static void +l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, + boolean_t state_only) +{ + l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; + l2arc_dev_t *dev = l2hdr->b_dev; + uint64_t lsize = HDR_GET_LSIZE(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); + arc_buf_contents_t type = hdr->b_type; + int64_t lsize_s; + int64_t psize_s; + int64_t asize_s; + + if (!incr) { + lsize_s = -lsize; + psize_s = -psize; + asize_s = -asize; + } else { + lsize_s = lsize; + psize_s = psize; + asize_s = asize; + } + + /* If the buffer is a prefetch, count it as such. */ + if (HDR_PREFETCH(hdr)) { + ARCSTAT_INCR(arcstat_l2_prefetch_asize, asize_s); + } else { + /* + * We use the value stored in the L2 header upon initial + * caching in L2ARC. This value will be updated in case + * an MRU/MRU_ghost buffer transitions to MFU but the L2ARC + * metadata (log entry) cannot currently be updated. Having + * the ARC state in the L2 header solves the problem of a + * possibly absent L1 header (apparent in buffers restored + * from persistent L2ARC). + */ + switch (hdr->b_l2hdr.b_arcs_state) { + case ARC_STATE_MRU_GHOST: + case ARC_STATE_MRU: + ARCSTAT_INCR(arcstat_l2_mru_asize, asize_s); + break; + case ARC_STATE_MFU_GHOST: + case ARC_STATE_MFU: + ARCSTAT_INCR(arcstat_l2_mfu_asize, asize_s); + break; + default: + break; + } + } + + if (state_only) + return; + + ARCSTAT_INCR(arcstat_l2_psize, psize_s); + ARCSTAT_INCR(arcstat_l2_lsize, lsize_s); + + switch (type) { + case ARC_BUFC_DATA: + ARCSTAT_INCR(arcstat_l2_bufc_data_asize, asize_s); + break; + case ARC_BUFC_METADATA: + ARCSTAT_INCR(arcstat_l2_bufc_metadata_asize, asize_s); + break; + default: + break; + } +} + + static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { @@ -3690,9 +3781,7 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) list_remove(&dev->l2ad_buflist, hdr); - ARCSTAT_INCR(arcstat_l2_psize, -psize); - ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); - + l2arc_hdr_arcstats_update(hdr, B_FALSE, B_FALSE); vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); (void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), @@ -3896,6 +3985,21 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) if (l2arc_write_eligible(hdr->b_spa, hdr)) { ARCSTAT_INCR(arcstat_evict_l2_eligible, HDR_GET_LSIZE(hdr)); + + switch (state->arcs_state) { + case ARC_STATE_MRU: + ARCSTAT_INCR( + arcstat_evict_l2_eligible_mru, + HDR_GET_LSIZE(hdr)); + break; + case ARC_STATE_MFU: + ARCSTAT_INCR( + arcstat_evict_l2_eligible_mfu, + HDR_GET_LSIZE(hdr)); + break; + default: + break; + } } else { ARCSTAT_INCR(arcstat_evict_l2_ineligible, HDR_GET_LSIZE(hdr)); @@ -5329,11 +5433,17 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) ASSERT(multilist_link_active( &hdr->b_l1hdr.b_arc_node)); } else { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_FALSE, + B_TRUE); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH); atomic_inc_32(&hdr->b_l1hdr.b_mru_hits); ARCSTAT_BUMP(arcstat_mru_hits); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_TRUE, + B_TRUE); } hdr->b_l1hdr.b_arc_access = now; return; @@ -5364,13 +5474,18 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * was evicted from the cache. Move it to the * MFU state. */ - if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { new_state = arc_mru; if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_FALSE, + B_TRUE); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_TRUE, + B_TRUE); } DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { @@ -5598,8 +5713,6 @@ arc_read_done(zio_t *zio) } arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED); - if (l2arc_noprefetch && HDR_PREFETCH(hdr)) - arc_hdr_clear_flags(hdr, ARC_FLAG_L2CACHE); callback_list = hdr->b_l1hdr.b_acb; ASSERT3P(callback_list, !=, NULL); @@ -5945,8 +6058,14 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc != EACCES); } else if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_FALSE, + B_TRUE); arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_TRUE, + B_TRUE); } DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); @@ -6090,8 +6209,13 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, } if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_FALSE, B_TRUE); arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_update(hdr, B_TRUE, B_TRUE); + } if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); if (*arc_flags & ARC_FLAG_L2CACHE) @@ -7857,9 +7981,11 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr) * 2. is already cached on the L2ARC. * 3. has an I/O in progress (it may be an incomplete read). * 4. is flagged not eligible (zfs property). + * 5. is a prefetch and l2arc_noprefetch is set. */ if (hdr->b_spa != spa_guid || HDR_HAS_L2HDR(hdr) || - HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr)) + HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr) || + (l2arc_noprefetch && HDR_PREFETCH(hdr))) return (B_FALSE); return (B_TRUE); @@ -8050,9 +8176,6 @@ l2arc_write_done(zio_t *zio) DTRACE_PROBE2(l2arc__iodone, zio_t *, zio, l2arc_write_callback_t *, cb); - if (zio->io_error != 0) - ARCSTAT_BUMP(arcstat_l2_writes_error); - /* * All writes completed, or an error was hit. */ @@ -8116,8 +8239,7 @@ l2arc_write_done(zio_t *zio) arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR); uint64_t psize = HDR_GET_PSIZE(hdr); - ARCSTAT_INCR(arcstat_l2_psize, -psize); - ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); + l2arc_hdr_arcstats_update(hdr, B_FALSE, B_FALSE); bytes_dropped += vdev_psize_to_asize(dev->l2ad_vdev, psize); @@ -8165,6 +8287,8 @@ l2arc_write_done(zio_t *zio) list_destroy(&cb->l2wcb_abd_list); if (zio->io_error != 0) { + ARCSTAT_BUMP(arcstat_l2_writes_error); + /* * Restore the lbps array in the header to its previous state. * If the list of log block pointers is empty, zero out the @@ -9062,6 +9186,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdr->b_l2hdr.b_hits = 0; hdr->b_l2hdr.b_daddr = dev->l2ad_hand; + hdr->b_l2hdr.b_arcs_state = + hdr->b_l1hdr.b_state->arcs_state; arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR); mutex_enter(&dev->l2ad_mtx); @@ -9084,6 +9210,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) write_psize += psize; write_asize += asize; dev->l2ad_hand += asize; + l2arc_hdr_arcstats_update(hdr, B_TRUE, B_FALSE); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); mutex_exit(hash_lock); @@ -9126,8 +9253,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT3U(write_asize, <=, target_sz); ARCSTAT_BUMP(arcstat_l2_writes_sent); ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize); - ARCSTAT_INCR(arcstat_l2_lsize, write_lsize); - ARCSTAT_INCR(arcstat_l2_psize, write_psize); dev->l2ad_writing = B_TRUE; (void) zio_wait(pio); @@ -10079,19 +10204,18 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) L2BLK_GET_PSIZE((le)->le_prop), le->le_birth, L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel, L2BLK_GET_PROTECTED((le)->le_prop), - L2BLK_GET_PREFETCH((le)->le_prop)); + L2BLK_GET_PREFETCH((le)->le_prop), + L2BLK_GET_STATE((le)->le_prop)); asize = vdev_psize_to_asize(dev->l2ad_vdev, L2BLK_GET_PSIZE((le)->le_prop)); /* * vdev_space_update() has to be called before arc_hdr_destroy() to - * avoid underflow since the latter also calls the former. + * avoid underflow since the latter also calls vdev_space_update(). */ + l2arc_hdr_arcstats_update(hdr, B_TRUE, B_FALSE); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); - ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(hdr)); - ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(hdr)); - mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_buflist, hdr); (void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); @@ -10111,14 +10235,15 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) arc_hdr_set_flags(exists, ARC_FLAG_HAS_L2HDR); exists->b_l2hdr.b_dev = dev; exists->b_l2hdr.b_daddr = le->le_daddr; + exists->b_l2hdr.b_arcs_state = + L2BLK_GET_STATE((le)->le_prop); mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_buflist, exists); (void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(exists), exists); mutex_exit(&dev->l2ad_mtx); + l2arc_hdr_arcstats_update(exists, B_TRUE, B_FALSE); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); - ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(exists)); - ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(exists)); } ARCSTAT_BUMP(arcstat_l2_rebuild_bufs_precached); } @@ -10414,6 +10539,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr) L2BLK_SET_TYPE((le)->le_prop, hdr->b_type); L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr))); L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr))); + L2BLK_SET_STATE((le)->le_prop, hdr->b_l1hdr.b_state->arcs_state); dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev, HDR_GET_PSIZE(hdr)); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index fcd9684603b4..cc22fbe5c1eb 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -898,3 +898,7 @@ tests = ['log_spacemap_import_logs'] pre = post = tags = ['functional', 'log_spacemap'] + +[tests/functional/l2arc_arcstats] +tests = ['l2arc_arcstats_pos'] +tags = ['functional', 'l2arc_arcstats'] diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index c56518c55a03..7acd24472f4a 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -31,6 +31,7 @@ SUBDIRS = \ inheritance \ inuse \ io \ + l2arc_arcstats \ large_files \ largest_pool \ libzfs \ diff --git a/tests/zfs-tests/tests/functional/l2arc_arcstats/Makefile.am b/tests/zfs-tests/tests/functional/l2arc_arcstats/Makefile.am new file mode 100644 index 000000000000..f72698534b01 --- /dev/null +++ b/tests/zfs-tests/tests/functional/l2arc_arcstats/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/l2arc_arcstats +dist_pkgdata_SCRIPTS = \ + cleanup.ksh \ + setup.ksh \ + l2arc_arcstats_pos.ksh + +dist_pkgdata_DATA = \ + l2arc_arcstats.cfg diff --git a/tests/zfs-tests/tests/functional/l2arc_arcstats/cleanup.ksh b/tests/zfs-tests/tests/functional/l2arc_arcstats/cleanup.ksh new file mode 100755 index 000000000000..87c752d0b285 --- /dev/null +++ b/tests/zfs-tests/tests/functional/l2arc_arcstats/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, George Amanakis. All rights reserved. +# + +. $STF_SUITE/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg + +verify_runnable "global" + +if datasetexists $TESTPOOL ; then + log_must zpool destroy -f $TESTPOOL +fi + +log_must rm -rf $VDIR + +log_pass diff --git a/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg b/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg new file mode 100644 index 000000000000..0c2c584a007b --- /dev/null +++ b/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, George Amanakis. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +export SIZE=1G +export VDIR=$TESTDIR/disk.l2arc_arcstats +export VDEV="$VDIR/a" +export VDEV_CACHE="$VDIR/b" + +# fio options +export DIRECTORY=/$TESTPOOL +export NUMJOBS=4 +export RUNTIME=10 +export PERF_RANDSEED=1234 +export PERF_COMPPERCENT=66 +export PERF_COMPCHUNK=0 +export BLOCKSIZE=128K +export SYNC_TYPE=0 +export DIRECT=1 diff --git a/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats_pos.ksh b/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats_pos.ksh new file mode 100755 index 000000000000..506df73e9aad --- /dev/null +++ b/tests/zfs-tests/tests/functional/l2arc_arcstats/l2arc_arcstats_pos.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, George Amanakis. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg + +# +# DESCRIPTION: +# L2ARC MFU/MRU arcstats do not leak +# +# STRATEGY: +# 1. Create pool with a cache device. +# 2. Create a random file in that pool, smaller than the cache device +# and random read for 30 sec. +# 3. Read l2arc_mfu_asize and l2arc_mru_asize +# 4. Export pool. +# . Verify l2arc_mfu_asize and l2arc_mru_asize are 0. +# 5. Import pool. +# 6. Read random read for 30 sec. +# 7. Read l2arc_mfu_asize and l2arc_mru_asize +# 8. Verify that L2ARC MFU increased and MFU+MRU = L2_asize. +# + +verify_runnable "global" + +log_assert "L2ARC MFU/MRU arcstats do not leak." + +function cleanup +{ + if poolexists $TESTPOOL ; then + destroy_pool $TESTPOOL + fi + + log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch +} +log_onexit cleanup + +# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches +typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH) +log_must set_tunable32 L2ARC_NOPREFETCH 0 + +typeset fill_mb=800 +typeset cache_sz=$(( 1.4 * $fill_mb )) +export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M + +log_must truncate -s ${cache_sz}M $VDEV_CACHE + +typeset log_blk_start=$(get_arcstat l2_log_blk_writes) + +log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE + +log_must fio $FIO_SCRIPTS/mkfiles.fio +log_must fio $FIO_SCRIPTS/random_reads.fio + +log_must zpool offline $TESTPOOL $VDEV_CACHE +typeset l2_mfu_init=$(get_arcstat l2_mfu_asize) +typeset l2_mru_init=$(get_arcstat l2_mru_asize) +typeset l2_prefetch_init=$(get_arcstat l2_prefetch_asize) +typeset l2_asize_init=$(get_arcstat l2_asize) +log_must zpool online $TESTPOOL $VDEV_CACHE + +log_must zpool export $TESTPOOL +log_must test $(get_arcstat l2_mfu_asize) -eq 0 +log_must test $(get_arcstat l2_mru_asize) -eq 0 +log_must zpool import -d $VDIR $TESTPOOL + +log_must fio $FIO_SCRIPTS/random_reads.fio +log_must zpool offline $TESTPOOL $VDEV_CACHE +typeset l2_mfu_end=$(get_arcstat l2_mfu_asize) +typeset l2_mru_end=$(get_arcstat l2_mru_asize) +typeset l2_prefetch_end=$(get_arcstat l2_prefetch_asize) +typeset l2_asize_end=$(get_arcstat l2_asize) + +log_must test $(( $l2_mfu_end - $l2_mfu_init )) -gt 0 +log_must test $(( $l2_mru_end + $l2_mfu_end + $l2_prefetch_end - \ + $l2_asize_end )) -eq 0 +log_must test $(( $l2_mru_init + $l2_mfu_init + $l2_prefetch_init - \ + $l2_asize_init )) -eq 0 + +log_must zpool destroy -f $TESTPOOL + +log_pass "L2ARC MFU/MRU arcstats do not leak." diff --git a/tests/zfs-tests/tests/functional/l2arc_arcstats/setup.ksh b/tests/zfs-tests/tests/functional/l2arc_arcstats/setup.ksh new file mode 100755 index 000000000000..d2252a9ea3e6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/l2arc_arcstats/setup.ksh @@ -0,0 +1,29 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, George Amanakis. All rights reserved. +# + +. $STF_SUITE/tests/functional/l2arc_arcstats/l2arc_arcstats.cfg + +verify_runnable "global" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must mkfile $SIZE $VDEV + +log_pass