Skip to content

Commit

Permalink
Finally drop long disabled vdev cache.
Browse files Browse the repository at this point in the history
It was a vdev level read cache, designed to aggregate many small
reads by speculatively issuing bigger reads instead and caching
the result.  But since it has almost no idea about what is going
on with exception of ZIO_FLAG_DONT_CACHE flag set by higher layers,
it was found to make more harm than good, for which reason it was
disabled for the past 12 years.  These days we have much better
instruments to enlarge the I/Os, such as speculative and prescient
prefetches, I/O scheduler, I/O aggregation etc.

Besides just the dead code removal this removes one extra mutex
lock/unlock per write inside vdev_cache_write(), not otherwise
disabled and trying to do some work.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #14953
  • Loading branch information
amotin authored Jun 9, 2023
1 parent 6db4ed5 commit 70ea484
Show file tree
Hide file tree
Showing 20 changed files with 13 additions and 562 deletions.
35 changes: 0 additions & 35 deletions cmd/arc_summary
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ SECTION_HELP = 'print info from one section ('+' '.join(SECTIONS)+')'
SECTION_PATHS = {'arc': 'arcstats',
'dmu': 'dmu_tx',
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
'vdev': 'vdev_cache_stats',
'zfetch': 'zfetchstats',
'zil': 'zil'}

Expand All @@ -90,8 +89,6 @@ if sys.platform.startswith('freebsd'):
# Requires py36-sysctl on FreeBSD
import sysctl

VDEV_CACHE_SIZE = 'vdev.cache_size'

def is_value(ctl):
return ctl.type != sysctl.CTLTYPE_NODE

Expand Down Expand Up @@ -135,8 +132,6 @@ elif sys.platform.startswith('linux'):
SPL_PATH = '/sys/module/spl/parameters'
TUNABLES_PATH = '/sys/module/zfs/parameters'

VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'

def load_kstats(section):
path = os.path.join(KSTAT_PATH, section)
with open(path) as f:
Expand Down Expand Up @@ -952,35 +947,6 @@ def section_tunables(*_):
print()


def section_vdev(kstats_dict):
"""Collect information on VDEV caches"""

# Currently [Nov 2017] the VDEV cache is disabled, because it is actually
# harmful. When this is the case, we just skip the whole entry. See
# https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
# for details
tunables = get_vdev_params()

if tunables[VDEV_CACHE_SIZE] == '0':
print('VDEV cache disabled, skipping section\n')
return

vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)

vdev_cache_total = int(vdev_stats['hits']) +\
int(vdev_stats['misses']) +\
int(vdev_stats['delegations'])

prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
f_hits(vdev_stats['hits']))
prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
f_hits(vdev_stats['misses']))
prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
f_hits(vdev_stats['delegations']))
print()


def section_zil(kstats_dict):
"""Collect information on the ZFS Intent Log. Some of the information
taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
Expand Down Expand Up @@ -1008,7 +974,6 @@ section_calls = {'arc': section_arc,
'l2arc': section_l2arc,
'spl': section_spl,
'tunables': section_tunables,
'vdev': section_vdev,
'zil': section_zil}


Expand Down
7 changes: 3 additions & 4 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -8546,9 +8546,9 @@ zdb_read_block(char *thing, spa_t *spa)
*/
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
ZIO_FLAG_OPTIONAL, NULL, NULL));
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
NULL, NULL));
}

error = zio_wait(zio);
Expand Down Expand Up @@ -8642,7 +8642,6 @@ zdb_read_block(char *thing, spa_t *spa)
zio_nowait(zio_vdev_child_io(czio, bp, vd,
offset, pabd, psize, ZIO_TYPE_READ,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE |
ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
Expand Down
1 change: 0 additions & 1 deletion include/os/linux/kernel/linux/mod_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ enum scope_prefix_types {
zfs_trim,
zfs_txg,
zfs_vdev,
zfs_vdev_cache,
zfs_vdev_file,
zfs_vdev_mirror,
zfs_vnops,
Expand Down
4 changes: 0 additions & 4 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1174,10 +1174,6 @@ extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
zbookmark_phys_t *zb);
extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);

/* vdev cache */
extern void vdev_cache_stat_init(void);
extern void vdev_cache_stat_fini(void);

/* vdev mirror */
extern void vdev_mirror_stat_init(void);
extern void vdev_mirror_stat_fini(void);
Expand Down
6 changes: 0 additions & 6 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,6 @@ extern boolean_t vdev_allocatable(vdev_t *vd);
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);

extern void vdev_cache_init(vdev_t *vd);
extern void vdev_cache_fini(vdev_t *vd);
extern boolean_t vdev_cache_read(zio_t *zio);
extern void vdev_cache_write(zio_t *zio);
extern void vdev_cache_purge(vdev_t *vd);

extern void vdev_queue_init(vdev_t *vd);
extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
Expand Down
20 changes: 0 additions & 20 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ extern "C" {
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
struct abd;

extern uint_t zfs_vdev_queue_depth_pct;
Expand Down Expand Up @@ -132,23 +130,6 @@ typedef const struct vdev_ops {
/*
* Virtual device properties
*/
struct vdev_cache_entry {
struct abd *ve_abd;
uint64_t ve_offset;
clock_t ve_lastused;
avl_node_t ve_offset_node;
avl_node_t ve_lastused_node;
uint32_t ve_hits;
uint16_t ve_missed_update;
zio_t *ve_fill_io;
};

struct vdev_cache {
avl_tree_t vc_offset_tree;
avl_tree_t vc_lastused_tree;
kmutex_t vc_lock;
};

typedef struct vdev_queue_class {
uint32_t vqc_active;

Expand Down Expand Up @@ -443,7 +424,6 @@ struct vdev {
boolean_t vdev_resilver_deferred; /* resilver deferred */
boolean_t vdev_kobj_flag; /* kobj event record */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
Expand Down
1 change: 0 additions & 1 deletion include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_SPECULATIVE (1ULL << 8)
#define ZIO_FLAG_CONFIG_WRITER (1ULL << 9)
#define ZIO_FLAG_DONT_RETRY (1ULL << 10)
#define ZIO_FLAG_DONT_CACHE (1ULL << 11)
#define ZIO_FLAG_NODATA (1ULL << 12)
#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14)
Expand Down
1 change: 0 additions & 1 deletion lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ nodist_libzpool_la_SOURCES = \
module/zfs/uberblock.c \
module/zfs/unique.c \
module/zfs/vdev.c \
module/zfs/vdev_cache.c \
module/zfs/vdev_draid.c \
module/zfs/vdev_draid_rand.c \
module/zfs/vdev_indirect.c \
Expand Down
15 changes: 0 additions & 15 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -2028,21 +2028,6 @@ Max vdev I/O aggregation size.
.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq uint
Max vdev I/O aggregation size for non-rotating media.
.
.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64 KiB Pc Pq uint
Shift size to inflate reads to.
.
.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq uint
Inflate reads smaller than this value to meet the
.Sy zfs_vdev_cache_bshift
size
.Pq default Sy 64 KiB .
.
.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq uint
Total size of the per-disk cache in bytes.
.Pp
Currently this feature is disabled, as it has been found to not be helpful
for performance and in some cases harmful.
.
.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
A number by which the balancing algorithm increments the load calculation for
the purpose of selecting the least busy mirror member when an I/O operation
Expand Down
1 change: 0 additions & 1 deletion man/man8/zpool-events.8
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,6 @@ ZIO_FLAG_CANFAIL:0x00000080
ZIO_FLAG_SPECULATIVE:0x00000100
ZIO_FLAG_CONFIG_WRITER:0x00000200
ZIO_FLAG_DONT_RETRY:0x00000400
ZIO_FLAG_DONT_CACHE:0x00000800
ZIO_FLAG_NODATA:0x00001000
ZIO_FLAG_INDUCE_DAMAGE:0x00002000

Expand Down
1 change: 0 additions & 1 deletion module/Kbuild.in
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,6 @@ ZFS_OBJS := \
uberblock.o \
unique.o \
vdev.o \
vdev_cache.o \
vdev_draid.o \
vdev_draid_rand.o \
vdev_indirect.o \
Expand Down
1 change: 0 additions & 1 deletion module/Makefile.bsd
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ SRCS+= abd.c \
uberblock.c \
unique.c \
vdev.c \
vdev_cache.c \
vdev_draid.c \
vdev_draid_rand.c \
vdev_indirect.c \
Expand Down
2 changes: 0 additions & 2 deletions module/os/freebsd/zfs/sysctl_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -872,8 +872,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
"Enable to bypass vdev_validate().");
/* END CSTYLED */

/* vdev_cache.c */

/* vdev_mirror.c */

/* vdev_queue.c */
Expand Down
11 changes: 4 additions & 7 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -6106,8 +6106,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
asize, abd,
ZIO_CHECKSUM_OFF,
l2arc_read_done, cb, priority,
zio_flags | ZIO_FLAG_DONT_CACHE |
ZIO_FLAG_CANFAIL |
zio_flags | ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY, B_FALSE);
acb->acb_zio_head = rzio;
Expand Down Expand Up @@ -10177,8 +10176,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_SPECULATIVE, B_FALSE));

abd_free(abd);
Expand Down Expand Up @@ -10498,11 +10496,10 @@ l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp,
cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
cb->l2rcb_abd = abd_get_from_buf(lb, asize);
pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY);
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));

return (pio);
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/dmu_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1371,8 +1371,8 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
dnode_t *dn;
abd_t *abd = rrd->abd;
zio_cksum_t bp_cksum = bp->blk_cksum;
zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
zio_flag_t flags = ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL;

if (rwa->raw)
flags |= ZIO_FLAG_RAW;
Expand Down
2 changes: 0 additions & 2 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2438,7 +2438,6 @@ spa_init(spa_mode_t mode)
zio_init();
dmu_init();
zil_init();
vdev_cache_stat_init();
vdev_mirror_stat_init();
vdev_raidz_math_init();
vdev_file_init();
Expand All @@ -2462,7 +2461,6 @@ spa_fini(void)
spa_evict_all();

vdev_file_fini();
vdev_cache_stat_fini();
vdev_mirror_stat_fini();
vdev_raidz_math_fini();
chksum_fini();
Expand Down
7 changes: 1 addition & 6 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
offsetof(struct vdev, vdev_dtl_node));
vd->vdev_stat.vs_timestamp = gethrtime();
vdev_queue_init(vd);
vdev_cache_init(vd);

return (vd);
}
Expand Down Expand Up @@ -1096,7 +1095,6 @@ vdev_free(vdev_t *vd)
* Clean up vdev structure.
*/
vdev_queue_fini(vd);
vdev_cache_fini(vd);

if (vd->vdev_path)
spa_strfree(vd->vdev_path);
Expand Down Expand Up @@ -1720,8 +1718,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);

vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
ZIO_FLAG_TRYHARD;
ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;

if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
/*
Expand Down Expand Up @@ -2612,8 +2609,6 @@ vdev_close(vdev_t *vd)

vd->vdev_ops->vdev_op_close(vd);

vdev_cache_purge(vd);

/*
* We record the previous state before we close it, so that if we are
* doing a reopen(), we don't generate FMA ereports if we notice that
Expand Down
Loading

0 comments on commit 70ea484

Please sign in to comment.