Skip to content

Commit

Permalink
Merge pull request torvalds#119 from sched-ext/htejun
Browse files Browse the repository at this point in the history
scx: Fix locking order
  • Loading branch information
Byte-Lab authored Jan 11, 2024
2 parents 7592388 + 1225a90 commit 4361d23
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 42 deletions.
42 changes: 23 additions & 19 deletions include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,24 +412,6 @@ struct sched_ext_ops {
*/
void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);

/**
* cpu_online - A CPU became online
* @cpu: CPU which just came up
*
* @cpu just came online. @cpu doesn't call ops.enqueue() or run tasks
* associated with other CPUs beforehand.
*/
void (*cpu_online)(s32 cpu);

/**
* cpu_offline - A CPU is going offline
* @cpu: CPU which is going offline
*
* @cpu is going offline. @cpu doesn't call ops.enqueue() or run tasks
* associated with other CPUs afterwards.
*/
void (*cpu_offline)(s32 cpu);

/**
* init_task - Initialize a task to run in a BPF scheduler
* @p: task to initialize for BPF scheduling
Expand Down Expand Up @@ -550,7 +532,29 @@ struct sched_ext_ops {
#endif /* CONFIG_CGROUPS */

/*
* All online ops must come before ops.init().
* All online ops must come before ops.cpu_online().
*/

/**
* cpu_online - A CPU became online
* @cpu: CPU which just came up
*
* @cpu just came online. @cpu doesn't call ops.enqueue() or run tasks
* associated with other CPUs beforehand.
*/
void (*cpu_online)(s32 cpu);

/**
* cpu_offline - A CPU is going offline
* @cpu: CPU which is going offline
*
* @cpu is going offline. @cpu doesn't call ops.enqueue() or run tasks
* associated with other CPUs afterwards.
*/
void (*cpu_offline)(s32 cpu);

/*
* All CPU hotplug ops must come before ops.init().
*/

/**
Expand Down
76 changes: 53 additions & 23 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))

enum scx_internal_consts {
SCX_NR_ONLINE_OPS = SCX_OP_IDX(init),
SCX_DSP_DFL_MAX_BATCH = 32,
SCX_DSP_MAX_LOOPS = 32,
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
SCX_OPI_BEGIN = 0,
SCX_OPI_NORMAL_BEGIN = 0,
SCX_OPI_NORMAL_END = SCX_OP_IDX(cpu_online),
SCX_OPI_CPU_HOTPLUG_BEGIN = SCX_OP_IDX(cpu_online),
SCX_OPI_CPU_HOTPLUG_END = SCX_OP_IDX(init),
SCX_OPI_END = SCX_OP_IDX(init),
SCX_DSP_DFL_MAX_BATCH = 32,
SCX_DSP_MAX_LOOPS = 32,
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
};

enum scx_ops_enable_state {
Expand Down Expand Up @@ -104,8 +109,8 @@ static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);

struct static_key_false scx_has_op[SCX_NR_ONLINE_OPS] =
{ [0 ... SCX_NR_ONLINE_OPS-1] = STATIC_KEY_FALSE_INIT };
struct static_key_false scx_has_op[SCX_OPI_END] =
{ [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT };

static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE);
static struct scx_exit_info scx_exit_info;
Expand Down Expand Up @@ -3196,9 +3201,12 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
static_branch_disable(&__scx_switched_all);
WRITE_ONCE(scx_switching_all, false);

/* avoid racing against fork and cgroup changes */
cpus_read_lock();
/*
* Avoid racing against fork and cgroup changes. See scx_ops_enable()
* for explanation on the locking order.
*/
percpu_down_write(&scx_fork_rwsem);
cpus_read_lock();
scx_cgroup_lock();

spin_lock_irq(&scx_tasks_lock);
Expand Down Expand Up @@ -3228,7 +3236,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)

/* no task is on scx, turn off all the switches and flush in-progress calls */
static_branch_disable_cpuslocked(&__scx_ops_enabled);
for (i = 0; i < SCX_NR_ONLINE_OPS; i++)
for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++)
static_branch_disable_cpuslocked(&scx_has_op[i]);
static_branch_disable_cpuslocked(&scx_ops_enq_last);
static_branch_disable_cpuslocked(&scx_ops_enq_exiting);
Expand All @@ -3239,8 +3247,8 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
scx_cgroup_exit();

scx_cgroup_unlock();
percpu_up_write(&scx_fork_rwsem);
cpus_read_unlock();
percpu_up_write(&scx_fork_rwsem);

if (ei->kind >= SCX_EXIT_ERROR) {
printk(KERN_ERR "sched_ext: BPF scheduler \"%s\" errored, disabling\n", scx_ops.name);
Expand Down Expand Up @@ -3373,13 +3381,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
scx_create_rt_helper("sched_ext_ops_helper"));
if (!scx_ops_helper) {
ret = -ENOMEM;
goto err_unlock;
goto err;
}
}

if (scx_ops_enable_state() != SCX_OPS_DISABLED) {
ret = -EBUSY;
goto err_unlock;
goto err;
}

/*
Expand Down Expand Up @@ -3408,7 +3416,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
ret = SCX_CALL_OP_RET(SCX_KF_INIT, init);
if (ret) {
ret = ops_sanitize_err("init", ret);
goto err_disable;
goto err_disable_unlock_cpus;
}

/*
Expand All @@ -3420,9 +3428,15 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
* ops.exit() like other scx_bpf_error() invocations.
*/
if (atomic_read(&scx_exit_kind) != SCX_EXIT_NONE)
goto err_disable;
goto err_disable_unlock_cpus;
}

for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++)
if (((void (**)(void))ops)[i])
static_branch_enable_cpuslocked(&scx_has_op[i]);

cpus_read_unlock();

ret = validate_ops(ops);
if (ret)
goto err_disable;
Expand All @@ -3449,11 +3463,26 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
/*
* Lock out forks, cgroup on/offlining and moves before opening the
* floodgate so that they don't wander into the operations prematurely.
*
* We don't need to keep the CPUs stable but static_branch_*() requires
* cpus_read_lock() and scx_cgroup_rwsem must nest inside
* cpu_hotplug_lock because of the following dependency chain:
*
* cpu_hotplug_lock --> cgroup_threadgroup_rwsem --> scx_cgroup_rwsem
*
* So, we need to do cpus_read_lock() before scx_cgroup_lock() and use
* static_branch_*_cpuslocked().
*
* Note that cpu_hotplug_lock must nest inside scx_fork_rwsem due to the
* following dependency chain:
*
* scx_fork_rwsem --> pernet_ops_rwsem --> cpu_hotplug_lock
*/
percpu_down_write(&scx_fork_rwsem);
cpus_read_lock();
scx_cgroup_lock();

for (i = 0; i < SCX_NR_ONLINE_OPS; i++)
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
if (((void (**)(void))ops)[i])
static_branch_enable_cpuslocked(&scx_has_op[i]);

Expand All @@ -3478,7 +3507,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
*/
ret = scx_cgroup_init();
if (ret)
goto err_disable_unlock;
goto err_disable_unlock_all;

static_branch_enable_cpuslocked(&__scx_ops_enabled);

Expand All @@ -3504,7 +3533,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
spin_unlock_irq(&scx_tasks_lock);
pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
ret, p->comm, p->pid);
goto err_disable_unlock;
goto err_disable_unlock_all;
}

put_task_struct(p);
Expand All @@ -3528,7 +3557,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
preempt_enable();
spin_unlock_irq(&scx_tasks_lock);
ret = -EBUSY;
goto err_disable_unlock;
goto err_disable_unlock_all;
}

/*
Expand Down Expand Up @@ -3563,6 +3592,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
spin_unlock_irq(&scx_tasks_lock);
preempt_enable();
scx_cgroup_unlock();
cpus_read_unlock();
percpu_up_write(&scx_fork_rwsem);

if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
Expand All @@ -3571,24 +3601,24 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
}

if (scx_switch_all_req)
static_branch_enable_cpuslocked(&__scx_switched_all);
static_branch_enable(&__scx_switched_all);

cpus_read_unlock();
mutex_unlock(&scx_ops_enable_mutex);

scx_cgroup_config_knobs();

return 0;

err_unlock:
err:
mutex_unlock(&scx_ops_enable_mutex);
return ret;

err_disable_unlock:
err_disable_unlock_all:
scx_cgroup_unlock();
percpu_up_write(&scx_fork_rwsem);
err_disable:
err_disable_unlock_cpus:
cpus_read_unlock();
err_disable:
mutex_unlock(&scx_ops_enable_mutex);
/* must be fully disabled before returning */
scx_ops_disable(SCX_EXIT_ERROR);
Expand Down

0 comments on commit 4361d23

Please sign in to comment.