From 9907cc1cc8c16fa2c7c03aa33264153ca2bdad6c Mon Sep 17 00:00:00 2001 From: GeLiXin Date: Thu, 11 Aug 2016 11:15:37 +0800 Subject: [PATCH] Add zfs_arc_meta_limit_percent tunable ARC will evict meta buffers that exceed the arc_meta_limit. Before a further investigating on whether we should take special protection on meta buffers, this tunable make arc_meta_limit adjustable for different workloads. People can set zfs_arc_meta_limit_percent to any value while insmod zfs.ko, so some range check is added to guarantee a suitable arc_meta_limit. Suggested by Tim Chase, zfs_arc_dnode_limit is changed to a percent-style tunable as well. Signed-off-by: GeLiXin Signed-off-by: Brian Behlendorf Closes #4957 --- man/man5/zfs-module-parameters.5 | 43 +++++++++++++++++++++++++++----- module/zfs/arc.c | 40 ++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 12 deletions(-) mode change 100644 => 100755 man/man5/zfs-module-parameters.5 mode change 100644 => 100755 module/zfs/arc.c diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 old mode 100644 new mode 100755 index b4ad3700f3cf..aa2d06d6a4ab --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -401,15 +401,31 @@ Default value: \fB2\fR. .ad .RS 12n When the number of bytes consumed by dnodes in the ARC exceeds this number of -bytes, try to unpin some of it in response to demand for non-metadata. This -value acts as a floor to the amount of dnode metadata. +bytes, try to unpin some of it in response to demand for non-metadata. This +value acts as a floor to the amount of dnode metadata, and defaults to 0 which +indicates that a percent which is based on \fBzfs_arc_dnode_limit_percent\fR of +the ARC meta buffers that may be used for dnodes. See also \fBzfs_arc_meta_prune\fR which serves a similar purpose but is used when the amount of metadata in the ARC exceeds \fBzfs_arc_meta_limit\fR rather than in response to overall demand for non-metadata. .sp -Default value: \fB10% of zfs_arc_meta_limit\fR. +Default value: \fB0\fR. +.RE + +.sp +.ne 2 +.na +\fBzfs_arc_dnode_limit_percent\fR (ulong) +.ad +.RS 12n +Percentage that can be consumed by dnodes of ARC meta buffers. +.sp +See also \fBzfs_arc_dnode_limit\fR which serves a similar purpose but has a +higher priority if set to nonzero value. +.sp +Default value: \fB10\fR. .RE .sp @@ -503,15 +519,30 @@ Default value: \fB0\fR. The maximum allowed size in bytes that meta data buffers are allowed to consume in the ARC. When this limit is reached meta data buffers will be reclaimed even if the overall arc_c_max has not been reached. This -value defaults to 0 which indicates that 3/4 of the ARC may be used -for meta data. +value defaults to 0 which indicates that a percent which is based on +\fBzfs_arc_meta_limit_percent\fR of the ARC may be used for meta data. .sp This value my be changed dynamically except that it cannot be set back to 0 -for 3/4 of the ARC; it must be set to an explicit value. +for a specific percent of the ARC; it must be set to an explicit value. .sp Default value: \fB0\fR. .RE +.sp +.ne 2 +.na +\fBzfs_arc_meta_limit_percent\fR (ulong) +.ad +.RS 12n +Percentage of ARC buffers that can be used for meta data. + +See also \fBzfs_arc_meta_limit\fR which serves a similar purpose but has a +higher priority if set to nonzero value. + +.sp +Default value: \fB75\fR. +.RE + .sp .ne 2 .na diff --git a/module/zfs/arc.c b/module/zfs/arc.c old mode 100644 new mode 100755 index cc26b2e48bb0..82b16ff52e6a --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -239,6 +239,17 @@ int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ +/* + * ARC will evict meta buffers that exceed arc_meta_limit. This + * tunable make arc_meta_limit adjustable for different workloads. + */ +unsigned long zfs_arc_meta_limit_percent = 75; + +/* + * Percentage that can be consumed by dnodes of ARC meta buffers. + */ +unsigned long zfs_arc_dnode_limit_percent = 10; + /* * These tunables are Linux specific */ @@ -5357,6 +5368,7 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) static void arc_tuning_update(void) { + uint64_t percent; /* Valid range: 64M - */ if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) && (zfs_arc_max > 64 << 20) && (zfs_arc_max < ptob(physmem)) && @@ -5364,8 +5376,11 @@ arc_tuning_update(void) arc_c_max = zfs_arc_max; arc_c = arc_c_max; arc_p = (arc_c >> 1); - arc_meta_limit = (3 * arc_c_max) / 4; - arc_dnode_limit = arc_meta_limit / 10; + /* Valid range of arc_meta_limit: arc_meta_min - arc_c_max */ + percent = MIN(zfs_arc_meta_limit_percent, 100); + arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100); + percent = MIN(zfs_arc_dnode_limit_percent, 100); + arc_dnode_limit = (percent * arc_meta_limit) / 100; } /* Valid range: 32M - */ @@ -5437,6 +5452,7 @@ arc_init(void) #else uint64_t allmem = (physmem * PAGESIZE) / 2; #endif + uint64_t percent; mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL); @@ -5493,10 +5509,14 @@ arc_init(void) arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT; /* Initialize maximum observed usage to zero */ arc_meta_max = 0; - /* Set limit to 3/4 of arc_c_max with a floor of arc_meta_min */ - arc_meta_limit = MAX((3 * arc_c_max) / 4, arc_meta_min); - /* Default dnode limit is 10% of overall meta limit */ - arc_dnode_limit = arc_meta_limit / 10; + /* + * Set arc_meta_limit to a percent of arc_c_max with a floor of + * arc_meta_min, and a ceiling of arc_c_max. + */ + percent = MIN(zfs_arc_meta_limit_percent, 100); + arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100); + percent = MIN(zfs_arc_dnode_limit_percent, 100); + arc_dnode_limit = (percent * arc_meta_limit) / 100; /* Apply user specified tunings */ arc_tuning_update(); @@ -7169,6 +7189,10 @@ MODULE_PARM_DESC(zfs_arc_max, "Max arc size"); module_param(zfs_arc_meta_limit, ulong, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); +module_param(zfs_arc_meta_limit_percent, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_meta_limit_percent, + "Percent of arc size for arc meta limit"); + module_param(zfs_arc_meta_min, ulong, 0644); MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata"); @@ -7253,6 +7277,10 @@ MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes"); module_param(zfs_arc_dnode_limit, ulong, 0644); MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc"); +module_param(zfs_arc_dnode_limit_percent, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_dnode_limit_percent, + "Percent of ARC meta buffers for dnodes"); + module_param(zfs_arc_dnode_reduce_percent, ulong, 0644); MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent, "Percentage of excess dnodes to try to unpin");