Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise ARC shrinker algorithm #10600

Merged
merged 16 commits into from
Aug 1, 2020
4 changes: 2 additions & 2 deletions include/os/linux/spl/sys/shrinker.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ __ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\
\
static struct shrinker varname = { \
.shrink = __ ## varname ## _wrapper, \
.seeks = seek_cost \
.seeks = seek_cost, \
}

#define SHRINK_STOP (-1)
Expand All @@ -97,7 +97,7 @@ static struct shrinker varname = { \
static struct shrinker varname = { \
.count_objects = countfunc, \
.scan_objects = scanfunc, \
.seeks = seek_cost \
.seeks = seek_cost, \
}

#else
Expand Down
36 changes: 36 additions & 0 deletions include/os/linux/zfs/sys/trace_arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,41 @@ DEFINE_EVENT(zfs_l2arc_evict_class, name, \
/* END CSTYLED */
DEFINE_L2ARC_EVICT_EVENT(zfs_l2arc__evict);

/*
* Generic support for three argument tracepoints of the form:
*
* DTRACE_PROBE3(...,
* uint64_t, ...,
* uint64_t, ...,
* uint64_t, ...);
*/
/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_arc_wait_for_eviction_class,
TP_PROTO(uint64_t amount, uint64_t arc_evict_count, uint64_t aew_count),
TP_ARGS(amount, arc_evict_count, aew_count),
TP_STRUCT__entry(
__field(uint64_t, amount)
__field(uint64_t, arc_evict_count)
__field(uint64_t, aew_count)
),
TP_fast_assign(
__entry->amount = amount;
__entry->arc_evict_count = arc_evict_count;
__entry->aew_count = aew_count;
),
TP_printk("amount %llu arc_evict_count %llu aew_count %llu",
__entry->amount, __entry->arc_evict_count, __entry->aew_count)
);
/* END CSTYLED */

/* BEGIN CSTYLED */
#define DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(name) \
DEFINE_EVENT(zfs_arc_wait_for_eviction_class, name, \
TP_PROTO(uint64_t amount, uint64_t arc_evict_count, uint64_t aew_count),
TP_ARGS(amount, arc_evict_count, aew_count),
/* END CSTYLED */
DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction);

#endif /* _TRACE_ARC_H */

#undef TRACE_INCLUDE_PATH
Expand All @@ -376,6 +411,7 @@ DEFINE_DTRACE_PROBE1(l2arc__miss);
DEFINE_DTRACE_PROBE2(l2arc__read);
DEFINE_DTRACE_PROBE2(l2arc__write);
DEFINE_DTRACE_PROBE2(l2arc__iodone);
DEFINE_DTRACE_PROBE3(arc__wait__for__eviction);
DEFINE_DTRACE_PROBE4(arc__miss);
DEFINE_DTRACE_PROBE4(l2arc__evict);

Expand Down
20 changes: 6 additions & 14 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -846,15 +846,11 @@ typedef struct arc_stats {
kstat_named_t arcstat_cached_only_in_progress;
} arc_stats_t;

typedef enum free_memory_reason_t {
FMR_UNKNOWN,
FMR_NEEDFREE,
FMR_LOTSFREE,
FMR_SWAPFS_MINFREE,
FMR_PAGES_PP_MAXIMUM,
FMR_HEAP_ARENA,
FMR_ZIO_ARENA,
} free_memory_reason_t;
typedef struct arc_evict_waiter {
list_node_t aew_node;
kcondvar_t aew_cv;
uint64_t aew_count;
} arc_evict_waiter_t;

#define ARCSTAT(stat) (arc_stats.stat.value.ui64)

Expand All @@ -870,7 +866,6 @@ typedef enum free_memory_reason_t {
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */

extern taskq_t *arc_prune_taskq;
extern arc_stats_t arc_stats;
Expand All @@ -879,10 +874,6 @@ extern boolean_t arc_warm;
extern int arc_grow_retry;
extern int arc_no_grow_shift;
extern int arc_shrink_shift;
extern zthr_t *arc_evict_zthr;
extern kmutex_t arc_evict_lock;
extern kcondvar_t arc_evict_waiters_cv;
extern boolean_t arc_evict_needed;
extern kmutex_t arc_prune_mtx;
extern list_t arc_prune_list;
extern aggsum_t arc_size;
Expand All @@ -897,6 +888,7 @@ extern void arc_reduce_target_size(int64_t to_free);
extern boolean_t arc_reclaim_needed(void);
extern void arc_kmem_reap_soon(void);
extern boolean_t arc_is_overflowing(void);
extern void arc_wait_for_eviction(uint64_t);

extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
Expand Down
40 changes: 40 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,23 @@ increased to reduce the memory footprint.
Default value: \fB8192\fR.
.RE

.sp
.ne 2
.na
\fBzfs_arc_eviction_pct\fR (int)
.ad
.RS 12n
When \fBarc_is_overflowing()\fR, \fBarc_get_data_impl()\fR waits for this
percent of the requested amount of data to be evicted. For example, by
default for every 2KB that's evicted, 1KB of it may be "reused" by a new
allocation. Since this is above 100%, it ensures that progress is made
towards getting \fBarc_size\fR under \fBarc_c\fR. Since this is finite, it
ensures that allocations can still happen, even during the potentially long
time that \fBarc_size\fR is more than \fBarc_c\fR.
.sp
Default value: \fB200\fR.
.RE

.sp
.ne 2
.na
Expand Down Expand Up @@ -1148,6 +1165,29 @@ only operates during memory pressure/reclaim.
Default value: \fB0\fR% (disabled).
.RE

.sp
.ne 2
.na
\fBzfs_arc_shrinker_limit\fR (int)
.ad
.RS 12n
This is a limit on how many pages the ARC shrinker makes available for
eviction in response to one page allocation attempt. Note that in
practice, the kernel's shrinker can ask us to evict up to about 4x this
for one allocation attempt.
.sp
The default limit of 10,000 (in practice, 160MB per allocation attempt with
4K pages) limits the amount of time spent attempting to reclaim ARC memory to
less than 100ms per allocation attempt, even with a small average compressed
block size of ~8KB.
.sp
ahrens marked this conversation as resolved.
Show resolved Hide resolved
The parameter can be set to 0 (zero) to disable the limit.
.sp
This parameter only applies on Linux.
.sp
Default value: \fB10,000\fR.
.RE

.sp
.ne 2
.na
Expand Down
19 changes: 4 additions & 15 deletions module/os/freebsd/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ extern struct vfsops zfs_vfsops;

uint_t zfs_arc_free_target = 0;

int64_t last_free_memory;
free_memory_reason_t last_free_reason;

static void
arc_free_target_init(void *unused __unused)
{
Expand Down Expand Up @@ -100,7 +97,6 @@ arc_available_memory(void)
{
int64_t lowest = INT64_MAX;
int64_t n __unused;
free_memory_reason_t r = FMR_UNKNOWN;

/*
* Cooperate with pagedaemon when it's time for it to scan
Expand All @@ -109,7 +105,6 @@ arc_available_memory(void)
n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
if (n < lowest) {
lowest = n;
r = FMR_LOTSFREE;
}
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
/*
Expand All @@ -126,13 +121,10 @@ arc_available_memory(void)
n = uma_avail() - (long)(uma_limit() / 4);
if (n < lowest) {
lowest = n;
r = FMR_HEAP_ARENA;
}
#endif

last_free_memory = lowest;
last_free_reason = r;
DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r);
DTRACE_PROBE1(arc__available_memory, int64_t, lowest);
return (lowest);
}

Expand Down Expand Up @@ -223,18 +215,15 @@ arc_lowmem(void *arg __unused, int howto __unused)
DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
arc_reduce_target_size(to_free);

mutex_enter(&arc_evict_lock);
arc_evict_needed = B_TRUE;
zthr_wakeup(arc_evict_zthr);

/*
* It is unsafe to block here in arbitrary threads, because we can come
* here from ARC itself and may hold ARC locks and thus risk a deadlock
* with ARC reclaim thread.
*/
if (curproc == pageproc)
(void) cv_wait(&arc_evict_waiters_cv, &arc_evict_lock);
mutex_exit(&arc_evict_lock);
arc_wait_for_eviction(to_free);
else
arc_wait_for_eviction(0);
}

void
Expand Down
Loading