Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement memory and CPU hotplug #11212

Merged
merged 12 commits into from
Dec 10, 2020
26 changes: 26 additions & 0 deletions config/kernel-hotplug.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
dnl #
dnl # 4.6 API change
dnl # Added CPU hotplug APIs
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HOTPLUG], [
ZFS_LINUX_TEST_SRC([cpu_hotplug], [
#include <linux/cpuhotplug.h>
],[
enum cpuhp_state state = CPUHP_ONLINE;
int (*fp)(unsigned int, struct hlist_node *) = NULL;
cpuhp_state_add_instance_nocalls(0, (struct hlist_node *)NULL);
cpuhp_state_remove_instance_nocalls(0, (struct hlist_node *)NULL);
cpuhp_setup_state_multi(state, "", fp, fp);
cpuhp_remove_multi_state(0);
])
])

AC_DEFUN([ZFS_AC_KERNEL_CPU_HOTPLUG], [
AC_MSG_CHECKING([whether CPU hotplug APIs exist])
ZFS_LINUX_TEST_RESULT([cpu_hotplug], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_CPU_HOTPLUG, 1, [yes])
],[
AC_MSG_RESULT(no)
])
])
2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
ZFS_AC_KERNEL_SRC_KSTRTOUL
ZFS_AC_KERNEL_SRC_PERCPU
ZFS_AC_KERNEL_SRC_CPU_HOTPLUG

AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
Expand Down Expand Up @@ -221,6 +222,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_TOTALHIGH_PAGES
ZFS_AC_KERNEL_KSTRTOUL
ZFS_AC_KERNEL_PERCPU
ZFS_AC_KERNEL_CPU_HOTPLUG
])

dnl #
Expand Down
5 changes: 5 additions & 0 deletions include/os/linux/spl/sys/taskq.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ typedef struct taskq {
int tq_nthreads; /* # of existing threads */
int tq_nspawn; /* # of threads being spawned */
int tq_maxthreads; /* # of threads maximum */
/* If PERCPU flag is set, percent of NCPUs to have as threads */
int tq_cpu_pct;
int tq_pri; /* priority */
int tq_minalloc; /* min taskq_ent_t pool size */
int tq_maxalloc; /* max taskq_ent_t pool size */
Expand All @@ -99,6 +101,9 @@ typedef struct taskq {
spl_wait_queue_head_t tq_work_waitq; /* new work waitq */
spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */
tq_lock_role_t tq_lock_class; /* class when taking tq_lock */
/* list node for the cpu hotplug callback */
struct hlist_node tq_hp_cb_node;
boolean_t tq_hp_support;
} taskq_t;

typedef struct taskq_ent {
Expand Down
1 change: 1 addition & 0 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
uint64_t arc_all_memory(void);
uint64_t arc_default_max(uint64_t min, uint64_t allmem);
uint64_t arc_target_bytes(void);
void arc_set_limits(uint64_t);
void arc_init(void);
void arc_fini(void);

Expand Down
2 changes: 2 additions & 0 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,8 @@ extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void);
extern void arc_tuning_update(boolean_t);
extern void arc_register_hotplug(void);
extern void arc_unregister_hotplug(void);

extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
Expand Down
10 changes: 10 additions & 0 deletions module/os/freebsd/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,13 @@ arc_lowmem_fini(void)
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
}

void
arc_register_hotplug(void)
{
}

void
arc_unregister_hotplug(void)
{
}
128 changes: 124 additions & 4 deletions module/os/linux/spl/spl-taskq.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
#include <sys/kmem.h>
#include <sys/tsd.h>
#include <sys/trace_spl.h>
#ifdef HAVE_CPU_HOTPLUG
#include <linux/cpuhotplug.h>
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
#endif

int spl_taskq_thread_bind = 0;
module_param(spl_taskq_thread_bind, int, 0644);
Expand Down Expand Up @@ -59,6 +62,11 @@ EXPORT_SYMBOL(system_delay_taskq);
static taskq_t *dynamic_taskq;
static taskq_thread_t *taskq_thread_create(taskq_t *);

#ifdef HAVE_CPU_HOTPLUG
/* Multi-callback id for cpu hotplugging. */
static int spl_taskq_cpuhp_state;
#endif

/* List of all taskqs */
LIST_HEAD(tq_list);
struct rw_semaphore tq_list_sem;
Expand Down Expand Up @@ -1024,13 +1032,14 @@ taskq_thread_create(taskq_t *tq)
}

taskq_t *
taskq_create(const char *name, int nthreads, pri_t pri,
taskq_create(const char *name, int threads_arg, pri_t pri,
int minalloc, int maxalloc, uint_t flags)
{
taskq_t *tq;
taskq_thread_t *tqt;
int count = 0, rc = 0, i;
unsigned long irqflags;
int nthreads = threads_arg;

ASSERT(name != NULL);
ASSERT(minalloc >= 0);
Expand All @@ -1041,15 +1050,27 @@ taskq_create(const char *name, int nthreads, pri_t pri,
if (flags & TASKQ_THREADS_CPU_PCT) {
ASSERT(nthreads <= 100);
ASSERT(nthreads >= 0);
nthreads = MIN(nthreads, 100);
nthreads = MIN(threads_arg, 100);
nthreads = MAX(nthreads, 0);
nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
nthreads = MAX((num_online_cpus() * nthreads) /100, 1);
}

tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
if (tq == NULL)
return (NULL);

tq->tq_hp_support = B_FALSE;
#ifdef HAVE_CPU_HOTPLUG
if (flags & TASKQ_THREADS_CPU_PCT) {
tq->tq_hp_support = B_TRUE;
if (cpuhp_state_add_instance_nocalls(spl_taskq_cpuhp_state,
&tq->tq_hp_cb_node) != 0) {
kmem_free(tq, sizeof (*tq));
return (NULL);
}
}
#endif

spin_lock_init(&tq->tq_lock);
INIT_LIST_HEAD(&tq->tq_thread_list);
INIT_LIST_HEAD(&tq->tq_active_list);
Expand All @@ -1058,6 +1079,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
tq->tq_nthreads = 0;
tq->tq_nspawn = 0;
tq->tq_maxthreads = nthreads;
tq->tq_cpu_pct = threads_arg;
tq->tq_pri = pri;
tq->tq_minalloc = minalloc;
tq->tq_maxalloc = maxalloc;
Expand Down Expand Up @@ -1131,6 +1153,12 @@ taskq_destroy(taskq_t *tq)
tq->tq_flags &= ~TASKQ_ACTIVE;
spin_unlock_irqrestore(&tq->tq_lock, flags);

#ifdef HAVE_CPU_HOTPLUG
if (tq->tq_hp_support) {
VERIFY0(cpuhp_state_remove_instance_nocalls(
spl_taskq_cpuhp_state, &tq->tq_hp_cb_node));
}
#endif
/*
* When TASKQ_ACTIVE is clear new tasks may not be added nor may
* new worker threads be spawned for dynamic taskq.
Expand Down Expand Up @@ -1198,7 +1226,6 @@ taskq_destroy(taskq_t *tq)
}
EXPORT_SYMBOL(taskq_destroy);


static unsigned int spl_taskq_kick = 0;

/*
Expand Down Expand Up @@ -1255,12 +1282,94 @@ module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
MODULE_PARM_DESC(spl_taskq_kick,
"Write nonzero to kick stuck taskqs to spawn more threads");

#ifdef HAVE_CPU_HOTPLUG
/*
* This callback will be called exactly once for each core that comes online,
* for each dynamic taskq. We attempt to expand taskqs that have
* TASKQ_THREADS_CPU_PCT set. We need to redo the percentage calculation every
* time, to correctly determine whether or not to add a thread.
*/
static int
spl_taskq_expand(unsigned int cpu, struct hlist_node *node)
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
{
taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
unsigned long flags;
int err = 0;

ASSERT(tq);
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);

if (!(tq->tq_flags & TASKQ_ACTIVE))
goto out;

ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
int nthreads = MIN(tq->tq_cpu_pct, 100);
nthreads = MAX(((num_online_cpus() + 1) * nthreads) / 100, 1);
tq->tq_maxthreads = nthreads;

if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
tq->tq_maxthreads > tq->tq_nthreads) {
taskq_thread_t *tqt = taskq_thread_create(tq);
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
if (tqt == NULL)
err = -1;
}

out:
spin_unlock_irqrestore(&tq->tq_lock, flags);
return (err);
}

/*
* While we don't support offlining CPUs, it is possible that CPUs will fail
* to online successfully. We do need to be able to handle this case
* gracefully.
*/
static int
spl_taskq_prepare_down(unsigned int cpu, struct hlist_node *node)
{
taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
unsigned long flags;

ASSERT(tq);
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);

if (!(tq->tq_flags & TASKQ_ACTIVE))
goto out;

ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
int nthreads = MIN(tq->tq_cpu_pct, 100);
nthreads = MAX(((num_online_cpus()) * nthreads) / 100, 1);
tq->tq_maxthreads = nthreads;

if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
tq->tq_maxthreads < tq->tq_nthreads) {
taskq_thread_t *tqt = list_entry(tq->tq_thread_list.next,
taskq_thread_t, tqt_thread_list);
struct task_struct *thread = tqt->tqt_thread;
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
spin_unlock_irqrestore(&tq->tq_lock, flags);

kthread_stop(thread);
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved

return (0);
}

out:
spin_unlock_irqrestore(&tq->tq_lock, flags);
return (0);
}
#endif

int
spl_taskq_init(void)
{
init_rwsem(&tq_list_sem);
tsd_create(&taskq_tsd, NULL);

#ifdef HAVE_CPU_HOTPLUG
spl_taskq_cpuhp_state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
"fs/spl_taskq:online", spl_taskq_expand, spl_taskq_prepare_down);
#endif

pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
if (system_taskq == NULL)
Expand All @@ -1269,13 +1378,19 @@ spl_taskq_init(void)
system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
if (system_delay_taskq == NULL) {
#ifdef HAVE_CPU_HOTPLUG
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
#endif
taskq_destroy(system_taskq);
return (1);
}

dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
if (dynamic_taskq == NULL) {
#ifdef HAVE_CPU_HOTPLUG
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
#endif
taskq_destroy(system_taskq);
taskq_destroy(system_delay_taskq);
return (1);
Expand Down Expand Up @@ -1304,4 +1419,9 @@ spl_taskq_fini(void)
system_taskq = NULL;

tsd_destroy(&taskq_tsd);

#ifdef HAVE_CPU_HOTPLUG
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
spl_taskq_cpuhp_state = 0;
#endif
}
Loading