diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index dd113d4a95ada..793efa68fe1de 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -51,13 +51,15 @@ enum scx_dsq_id_flags { }; /* - * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the - * scheduler core and the BPF scheduler. See the documentation for more details. + * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered + * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to + * buffer between the scheduler core and the BPF scheduler. See the + * documentation for more details. */ struct scx_dispatch_q { raw_spinlock_t lock; - struct list_head fifo; /* processed in dispatching order */ - struct rb_root_cached priq; /* processed in p->scx.dsq_vtime order */ + struct list_head list; /* tasks in dispatch order */ + struct rb_root priq; /* used to order by p->scx.dsq_vtime */ u32 nr; u64 id; struct rhash_head hash_node; @@ -119,18 +121,20 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; +struct scx_dsq_node { + struct list_head list; /* dispatch order */ + struct rb_node priq; /* p->scx.dsq_vtime order */ + u32 flags; /* SCX_TASK_DSQ_* flags */ +}; + /* * The following is embedded in task_struct and contains all fields necessary * for a task to be scheduled by SCX. */ struct sched_ext_entity { struct scx_dispatch_q *dsq; - struct { - struct list_head fifo; /* dispatch order */ - struct rb_node priq; /* p->scx.dsq_vtime order */ - } dsq_node; + struct scx_dsq_node dsq_node; /* protected by dsq lock */ u32 flags; /* protected by rq lock */ - u32 dsq_flags; /* protected by dsq lock */ u32 weight; s32 sticky_cpu; s32 holding_cpu; diff --git a/init/init_task.c b/init/init_task.c index a32c606cc81ba..5441363f98a7c 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -101,7 +101,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #endif #ifdef CONFIG_SCHED_CLASS_EXT .scx = { - .dsq_node.fifo = LIST_HEAD_INIT(init_task.scx.dsq_node.fifo), + .dsq_node.list = LIST_HEAD_INIT(init_task.scx.dsq_node.list), .flags = 0, .sticky_cpu = -1, .holding_cpu = -1, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ae7039676d313..19420fa4750e8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4586,7 +4586,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SCHED_CLASS_EXT p->scx.dsq = NULL; - INIT_LIST_HEAD(&p->scx.dsq_node.fifo); + INIT_LIST_HEAD(&p->scx.dsq_node.list); RB_CLEAR_NODE(&p->scx.dsq_node.priq); p->scx.flags = 0; p->scx.weight = 0; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index fc695b13efb05..46fd51b7a602e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -929,13 +929,6 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind, #define scx_ops_error(fmt, args...) \ scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args) -struct scx_task_iter { - struct sched_ext_entity cursor; - struct task_struct *locked; - struct rq *rq; - struct rq_flags rf; -}; - #define SCX_HAS_OP(op) static_branch_likely(&scx_has_op[SCX_OP_IDX(op)]) static long jiffies_delta_msecs(unsigned long at, unsigned long now) @@ -1085,7 +1078,7 @@ static __always_inline bool scx_kf_allowed(u32 mask) static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask, struct task_struct *p) { - if (!scx_kf_allowed(__SCX_KF_RQ_LOCKED)) + if (!scx_kf_allowed(mask)) return false; if (unlikely((p != current->scx.kf_tasks[0] && @@ -1097,6 +1090,13 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask, return true; } +struct scx_task_iter { + struct sched_ext_entity cursor; + struct task_struct *locked; + struct rq *rq; + struct rq_flags rf; +}; + /** * scx_task_iter_init - Initialize a task iterator * @iter: iterator to init @@ -1384,8 +1384,8 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, { bool is_local = dsq->id == SCX_DSQ_LOCAL; - WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.fifo)); - WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) || + WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.list)); + WARN_ON_ONCE((p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) || !RB_EMPTY_NODE(&p->scx.dsq_node.priq)); if (!is_local) { @@ -1413,22 +1413,44 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, } if (enq_flags & SCX_ENQ_DSQ_PRIQ) { - p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; - rb_add_cached(&p->scx.dsq_node.priq, &dsq->priq, - scx_dsq_priq_less); - /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(!list_empty(&dsq->fifo))) + struct rb_node *rbp; + + /* + * A PRIQ DSQ shouldn't be using FIFO enqueueing. As tasks are + * linked to both the rbtree and list on PRIQs, this can only be + * tested easily when adding the first task. + */ + if (unlikely(RB_EMPTY_ROOT(&dsq->priq) && + !list_empty(&dsq->list))) scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); + + p->scx.dsq_node.flags |= SCX_TASK_DSQ_ON_PRIQ; + rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); + + /* + * Find the previous task and insert after it on the list so + * that @dsq->list is vtime ordered. + */ + rbp = rb_prev(&p->scx.dsq_node.priq); + if (rbp) { + struct task_struct *prev = + container_of(rbp, struct task_struct, + scx.dsq_node.priq); + list_add(&p->scx.dsq_node.list, &prev->scx.dsq_node.list); + } else { + list_add(&p->scx.dsq_node.list, &dsq->list); + } } else { - if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) - list_add(&p->scx.dsq_node.fifo, &dsq->fifo); - else - list_add_tail(&p->scx.dsq_node.fifo, &dsq->fifo); - /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(rb_first_cached(&dsq->priq))) + /* a FIFO DSQ shouldn't be using PRIQ enqueuing */ + if (unlikely(!RB_EMPTY_ROOT(&dsq->priq))) scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", dsq->id); + + if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) + list_add(&p->scx.dsq_node.list, &dsq->list); + else + list_add_tail(&p->scx.dsq_node.list, &dsq->list); } dsq->nr++; p->scx.dsq = dsq; @@ -1470,19 +1492,18 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, static void task_unlink_from_dsq(struct task_struct *p, struct scx_dispatch_q *dsq) { - if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) { - rb_erase_cached(&p->scx.dsq_node.priq, &dsq->priq); + if (p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) { + rb_erase(&p->scx.dsq_node.priq, &dsq->priq); RB_CLEAR_NODE(&p->scx.dsq_node.priq); - p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; - } else { - list_del_init(&p->scx.dsq_node.fifo); + p->scx.dsq_node.flags &= ~SCX_TASK_DSQ_ON_PRIQ; } + + list_del_init(&p->scx.dsq_node.list); } static bool task_linked_on_dsq(struct task_struct *p) { - return !list_empty(&p->scx.dsq_node.fifo) || - !RB_EMPTY_NODE(&p->scx.dsq_node.priq); + return !list_empty(&p->scx.dsq_node.list); } static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) @@ -1531,6 +1552,11 @@ static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) raw_spin_unlock(&dsq->lock); } +static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) +{ + return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); +} + static struct scx_dispatch_q *find_non_local_dsq(u64 dsq_id) { lockdep_assert(rcu_read_lock_any_held()); @@ -1538,8 +1564,7 @@ static struct scx_dispatch_q *find_non_local_dsq(u64 dsq_id) if (dsq_id == SCX_DSQ_GLOBAL) return &scx_dsq_global; else - return rhashtable_lookup_fast(&dsq_hash, &dsq_id, - dsq_hash_params); + return find_user_dsq(dsq_id); } static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, @@ -2024,26 +2049,15 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, { struct scx_rq *scx_rq = &rq->scx; struct task_struct *p; - struct rb_node *rb_node; struct rq *task_rq; bool moved = false; retry: - if (list_empty(&dsq->fifo) && !rb_first_cached(&dsq->priq)) + if (list_empty(&dsq->list)) return false; raw_spin_lock(&dsq->lock); - list_for_each_entry(p, &dsq->fifo, scx.dsq_node.fifo) { - task_rq = task_rq(p); - if (rq == task_rq) - goto this_rq; - if (task_can_run_on_rq(p, rq)) - goto remote_rq; - } - - for (rb_node = rb_first_cached(&dsq->priq); rb_node; - rb_node = rb_next(rb_node)) { - p = container_of(rb_node, struct task_struct, scx.dsq_node.priq); + list_for_each_entry(p, &dsq->list, scx.dsq_node.list) { task_rq = task_rq(p); if (rq == task_rq) goto this_rq; @@ -2058,7 +2072,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, /* @dsq is locked and @p is on this rq */ WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); - list_add_tail(&p->scx.dsq_node.fifo, &scx_rq->local_dsq.fifo); + list_add_tail(&p->scx.dsq_node.list, &scx_rq->local_dsq.list); dsq->nr--; scx_rq->local_dsq.nr++; p->scx.dsq = &scx_rq->local_dsq; @@ -2584,7 +2598,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) * can find the task unless it wants to trigger a separate * follow-up scheduling event. */ - if (list_empty(&rq->scx.local_dsq.fifo)) + if (list_empty(&rq->scx.local_dsq.list)) do_enqueue_task(rq, p, SCX_ENQ_LAST, -1); else do_enqueue_task(rq, p, 0, -1); @@ -2593,9 +2607,8 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) static struct task_struct *first_local_task(struct rq *rq) { - WARN_ON_ONCE(rb_first_cached(&rq->scx.local_dsq.priq)); - return list_first_entry_or_null(&rq->scx.local_dsq.fifo, - struct task_struct, scx.dsq_node.fifo); + return list_first_entry_or_null(&rq->scx.local_dsq.list, + struct task_struct, scx.dsq_node.list); } static struct task_struct *pick_next_task_scx(struct rq *rq) @@ -3669,7 +3682,7 @@ static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id) memset(dsq, 0, sizeof(*dsq)); raw_spin_lock_init(&dsq->lock); - INIT_LIST_HEAD(&dsq->fifo); + INIT_LIST_HEAD(&dsq->list); dsq->id = dsq_id; } @@ -3714,7 +3727,7 @@ static void destroy_dsq(u64 dsq_id) rcu_read_lock(); - dsq = rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); + dsq = find_user_dsq(dsq_id); if (!dsq) goto out_unlock_rcu; @@ -4305,7 +4318,7 @@ static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker, seq_buf_printf(s, " scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu\n", scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, - p->scx.dsq_flags, + p->scx.dsq_node.flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); seq_buf_printf(s, " sticky/holding_cpu=%d/%d dsq_id=%s\n", @@ -5354,6 +5367,44 @@ static const struct btf_kfunc_id_set scx_kfunc_set_sleepable = { .set = &scx_kfunc_ids_sleepable, }; +__bpf_kfunc_start_defs(); + +/** + * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() + * @p: task_struct to select a CPU for + * @prev_cpu: CPU @p was on previously + * @wake_flags: %SCX_WAKE_* flags + * @is_idle: out parameter indicating whether the returned CPU is idle + * + * Can only be called from ops.select_cpu() if the built-in CPU selection is + * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set. + * @p, @prev_cpu and @wake_flags match ops.select_cpu(). + * + * Returns the picked CPU with *@is_idle indicating whether the picked CPU is + * currently idle and thus a good candidate for direct dispatching. + */ +__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, + u64 wake_flags, bool *is_idle) +{ + if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) { + *is_idle = false; + return prev_cpu; + } + + return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(scx_kfunc_ids_select_cpu) +BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) +BTF_KFUNCS_END(scx_kfunc_ids_select_cpu) + +static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = { + .owner = THIS_MODULE, + .set = &scx_kfunc_ids_select_cpu, +}; + static bool scx_dispatch_preamble(struct task_struct *p, u64 enq_flags) { if (!scx_kf_allowed(SCX_KF_ENQUEUE | SCX_KF_DISPATCH)) @@ -5714,113 +5765,36 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags) * @dsq_id: id of the DSQ * * Return the number of tasks in the DSQ matching @dsq_id. If not found, - * -%ENOENT is returned. Can be called from any non-sleepable online scx_ops - * operations. + * -%ENOENT is returned. */ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) { struct scx_dispatch_q *dsq; + s32 ret; - lockdep_assert(rcu_read_lock_any_held()); + preempt_disable(); if (dsq_id == SCX_DSQ_LOCAL) { - return this_rq()->scx.local_dsq.nr; + ret = this_rq()->scx.local_dsq.nr; + goto out; } else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) { s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK; - if (ops_cpu_valid(cpu, NULL)) - return cpu_rq(cpu)->scx.local_dsq.nr; + if (ops_cpu_valid(cpu, NULL)) { + ret = cpu_rq(cpu)->scx.local_dsq.nr; + goto out; + } } else { dsq = find_non_local_dsq(dsq_id); - if (dsq) - return dsq->nr; - } - return -ENOENT; -} - -/** - * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state - * @cpu: cpu to test and clear idle for - * - * Returns %true if @cpu was idle and its idle state was successfully cleared. - * %false otherwise. - * - * Unavailable if ops.update_idle() is implemented and - * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. - */ -__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) -{ - if (!static_branch_likely(&scx_builtin_idle_enabled)) { - scx_ops_error("built-in idle tracking is disabled"); - return false; - } - - if (ops_cpu_valid(cpu, NULL)) - return test_and_clear_cpu_idle(cpu); - else - return false; -} - -/** - * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu - * @cpus_allowed: Allowed cpumask - * @flags: %SCX_PICK_IDLE_CPU_* flags - * - * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu - * number on success. -%EBUSY if no matching cpu was found. - * - * Idle CPU tracking may race against CPU scheduling state transitions. For - * example, this function may return -%EBUSY as CPUs are transitioning into the - * idle state. If the caller then assumes that there will be dispatch events on - * the CPUs as they were all busy, the scheduler may end up stalling with CPUs - * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and - * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch - * event in the near future. - * - * Unavailable if ops.update_idle() is implemented and - * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. - */ -__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, - u64 flags) -{ - if (!static_branch_likely(&scx_builtin_idle_enabled)) { - scx_ops_error("built-in idle tracking is disabled"); - return -EBUSY; - } - - return scx_pick_idle_cpu(cpus_allowed, flags); -} - -/** - * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU - * @cpus_allowed: Allowed cpumask - * @flags: %SCX_PICK_IDLE_CPU_* flags - * - * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any - * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu - * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is - * empty. - * - * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not - * set, this function can't tell which CPUs are idle and will always pick any - * CPU. - */ -__bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, - u64 flags) -{ - s32 cpu; - - if (static_branch_likely(&scx_builtin_idle_enabled)) { - cpu = scx_pick_idle_cpu(cpus_allowed, flags); - if (cpu >= 0) - return cpu; + if (dsq) { + ret = dsq->nr; + goto out; + } } - - cpu = cpumask_any_distribute(cpus_allowed); - if (cpu < nr_cpu_ids) - return cpu; - else - return -EBUSY; + ret = -ENOENT; +out: + preempt_enable(); + return ret; } /** @@ -5837,48 +5811,8 @@ __bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id) destroy_dsq(dsq_id); } -/** - * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() - * @p: task_struct to select a CPU for - * @prev_cpu: CPU @p was on previously - * @wake_flags: %SCX_WAKE_* flags - * @is_idle: out parameter indicating whether the returned CPU is idle - * - * Can only be called from ops.select_cpu() if the built-in CPU selection is - * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set. - * @p, @prev_cpu and @wake_flags match ops.select_cpu(). - * - * Returns the picked CPU with *@is_idle indicating whether the picked CPU is - * currently idle and thus a good candidate for direct dispatching. - */ -__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, - u64 wake_flags, bool *is_idle) -{ - if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) { - *is_idle = false; - return prev_cpu; - } - - return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); -} - __bpf_kfunc_end_defs(); -BTF_KFUNCS_START(scx_kfunc_ids_ops_only) -BTF_ID_FLAGS(func, scx_bpf_kick_cpu) -BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) -BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) -BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) -BTF_KFUNCS_END(scx_kfunc_ids_ops_only) - -static const struct btf_kfunc_id_set scx_kfunc_set_ops_only = { - .owner = THIS_MODULE, - .set = &scx_kfunc_ids_ops_only, -}; - struct scx_bpf_error_bstr_bufs { u64 data[MAX_BPRINTF_VARARGS]; char msg[SCX_EXIT_MSG_LEN]; @@ -6140,6 +6074,91 @@ __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) */ } +/** + * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state + * @cpu: cpu to test and clear idle for + * + * Returns %true if @cpu was idle and its idle state was successfully cleared. + * %false otherwise. + * + * Unavailable if ops.update_idle() is implemented and + * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. + */ +__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) +{ + if (!static_branch_likely(&scx_builtin_idle_enabled)) { + scx_ops_error("built-in idle tracking is disabled"); + return false; + } + + if (ops_cpu_valid(cpu, NULL)) + return test_and_clear_cpu_idle(cpu); + else + return false; +} + +/** + * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu + * @cpus_allowed: Allowed cpumask + * @flags: %SCX_PICK_IDLE_CPU_* flags + * + * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu + * number on success. -%EBUSY if no matching cpu was found. + * + * Idle CPU tracking may race against CPU scheduling state transitions. For + * example, this function may return -%EBUSY as CPUs are transitioning into the + * idle state. If the caller then assumes that there will be dispatch events on + * the CPUs as they were all busy, the scheduler may end up stalling with CPUs + * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and + * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch + * event in the near future. + * + * Unavailable if ops.update_idle() is implemented and + * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. + */ +__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, + u64 flags) +{ + if (!static_branch_likely(&scx_builtin_idle_enabled)) { + scx_ops_error("built-in idle tracking is disabled"); + return -EBUSY; + } + + return scx_pick_idle_cpu(cpus_allowed, flags); +} + +/** + * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU + * @cpus_allowed: Allowed cpumask + * @flags: %SCX_PICK_IDLE_CPU_* flags + * + * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any + * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu + * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is + * empty. + * + * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not + * set, this function can't tell which CPUs are idle and will always pick any + * CPU. + */ +__bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, + u64 flags) +{ + s32 cpu; + + if (static_branch_likely(&scx_builtin_idle_enabled)) { + cpu = scx_pick_idle_cpu(cpus_allowed, flags); + if (cpu >= 0) + return cpu; + } + + cpu = cpumask_any_distribute(cpus_allowed); + if (cpu < nr_cpu_ids) + return cpu; + else + return -EBUSY; +} + /** * scx_bpf_task_running - Is task currently running? * @p: task of interest @@ -6196,6 +6215,9 @@ __bpf_kfunc struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_any) +BTF_ID_FLAGS(func, scx_bpf_kick_cpu) +BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) +BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids) @@ -6208,6 +6230,9 @@ BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) +BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) +BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) #ifdef CONFIG_CGROUP_SCHED @@ -6237,14 +6262,14 @@ static int __init scx_init(void) */ if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_sleepable)) || + (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &scx_kfunc_set_select_cpu)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_enqueue_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cpu_release)) || - (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, - &scx_kfunc_set_ops_only)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_any)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,