diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 43818d4104c4..db6238fda61e 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -30,6 +30,7 @@ #define _SYS_ARC_IMPL_H #include +#include #include #include #include diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index bca881d82f87..d2985d572975 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -28,6 +28,7 @@ */ #include +#include #include #include #include @@ -65,13 +66,15 @@ typedef struct zfetch_stats { kstat_named_t zfetchstat_misses; kstat_named_t zfetchstat_max_streams; kstat_named_t zfetchstat_io_issued; + kstat_named_t zfetchstat_io_active; } zfetch_stats_t; static zfetch_stats_t zfetch_stats = { { "hits", KSTAT_DATA_UINT64 }, { "misses", KSTAT_DATA_UINT64 }, { "max_streams", KSTAT_DATA_UINT64 }, - { "io_issued", KSTAT_DATA_UINT64 }, + { "io_issued", KSTAT_DATA_UINT64 }, + { "io_active", KSTAT_DATA_UINT64 }, }; struct { @@ -79,6 +82,7 @@ struct { wmsum_t zfetchstat_misses; wmsum_t zfetchstat_max_streams; wmsum_t zfetchstat_io_issued; + aggsum_t zfetchstat_io_active; } zfetch_sums; #define ZFETCHSTAT_BUMP(stat) \ @@ -104,6 +108,8 @@ zfetch_kstats_update(kstat_t *ksp, int rw) wmsum_value(&zfetch_sums.zfetchstat_max_streams); zs->zfetchstat_io_issued.value.ui64 = wmsum_value(&zfetch_sums.zfetchstat_io_issued); + zs->zfetchstat_io_active.value.ui64 = + aggsum_value(&zfetch_sums.zfetchstat_io_active); return (0); } @@ -114,6 +120,7 @@ zfetch_init(void) wmsum_init(&zfetch_sums.zfetchstat_misses, 0); wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0); wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0); + aggsum_init(&zfetch_sums.zfetchstat_io_active, 0); zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), @@ -138,6 +145,8 @@ zfetch_fini(void) wmsum_fini(&zfetch_sums.zfetchstat_misses); wmsum_fini(&zfetch_sums.zfetchstat_max_streams); wmsum_fini(&zfetch_sums.zfetchstat_io_issued); + ASSERT0(aggsum_value(&zfetch_sums.zfetchstat_io_active)); + aggsum_fini(&zfetch_sums.zfetchstat_io_active); } /* @@ -294,6 +303,7 @@ dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued) zs->zs_more = B_TRUE; if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0) dmu_zfetch_stream_fini(zs); + aggsum_add(&zfetch_sums.zfetchstat_io_active, -1); } /* @@ -407,20 +417,28 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, * Start prefetch from the demand access size (nblks). Double the * distance every access up to zfetch_min_distance. After that only * if needed increase the distance by 1/8 up to zfetch_max_distance. + * + * Don't double the distance beyond single block if we have more + * than ~6% of ARC held by active prefetches. It should help with + * getting out of RAM on some badly mispredicted read patterns. */ - unsigned int nbytes = nblks << zf->zf_dnode->dn_datablkshift; + unsigned int dbs = zf->zf_dnode->dn_datablkshift; + unsigned int nbytes = nblks << dbs; unsigned int pf_nblks; if (fetch_data) { if (unlikely(zs->zs_pf_dist < nbytes)) zs->zs_pf_dist = nbytes; - else if (zs->zs_pf_dist < zfetch_min_distance) + else if (zs->zs_pf_dist < zfetch_min_distance && + (zs->zs_pf_dist < (1 << dbs) || + aggsum_compare(&zfetch_sums.zfetchstat_io_active, + arc_c_max >> (4 + dbs)) < 0)) zs->zs_pf_dist *= 2; else if (zs->zs_more) zs->zs_pf_dist += zs->zs_pf_dist / 8; zs->zs_more = B_FALSE; if (zs->zs_pf_dist > zfetch_max_distance) zs->zs_pf_dist = zfetch_max_distance; - pf_nblks = zs->zs_pf_dist >> zf->zf_dnode->dn_datablkshift; + pf_nblks = zs->zs_pf_dist >> dbs; } else { pf_nblks = 0; } @@ -439,7 +457,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, zs->zs_ipf_dist *= 2; if (zs->zs_ipf_dist > zfetch_max_idistance) zs->zs_ipf_dist = zfetch_max_idistance; - pf_nblks = zs->zs_ipf_dist >> zf->zf_dnode->dn_datablkshift; + pf_nblks = zs->zs_ipf_dist >> dbs; if (zs->zs_ipf_start < zs->zs_pf_end) zs->zs_ipf_start = zs->zs_pf_end; if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks) @@ -509,6 +527,7 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) dmu_zfetch_stream_fini(zs); return; } + aggsum_add(&zfetch_sums.zfetchstat_io_active, issued); if (!have_lock) rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);