From 25a5d102d28a2cc389ec5a9cb50873f7844f2edf Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 4 Dec 2023 15:35:36 -0600 Subject: [PATCH 1/2] scx: Disable vtime ordering for internal DSQs Internal DSQs, i.e. SCX_DSQ_LOCAL and SCX_DSQ_GLOBAL, have somewhat special behavior in that they're automatically consumed by the internal ext.c logic. A user could therefore accidentally starve tasks on either of the DSQs if they dispatch to both the vtime and FIFO queues, as they're consumed in a specific order by the internal logic. It likely doesn't make sense to ever use both FIFO and PRIQ ordering in the same DSQ, so let's explicitly disable it for the internal DSQs. In a follow-on change, we'll error out a scheduler if a user dispatches to both FIFO and vtime for any DSQ. Reported-by: Changwoo Min Signed-off-by: David Vernet --- Documentation/scheduler/sched-ext.rst | 8 +++++--- kernel/sched/ext.c | 23 ++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 25ddb535c29723..b67346cf523549 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst @@ -194,9 +194,11 @@ a task is never queued on the BPF scheduler and both the local and global DSQs are consumed automatically. ``scx_bpf_dispatch()`` queues the task on the FIFO of the target DSQ. Use -``scx_bpf_dispatch_vtime()`` for the priority queue. See the function -documentation and usage in ``tools/sched_ext/scx_simple.bpf.c`` for more -information. +``scx_bpf_dispatch_vtime()`` for the priority queue. Internal DSQs such as +``SCX_DSQ_LOCAL`` and ``SCX_DSQ_GLOBAL`` do not support priority-queue +dispatching, and must be dispatched to with ``scx_bpf_dispatch()``. See the +function documentation and usage in ``tools/sched_ext/scx_simple.bpf.c`` for +more information. Where to Look ============= diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ebe295eb78de42..c4fea4f7ff0594 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -647,6 +647,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, } if (enq_flags & SCX_ENQ_DSQ_PRIQ) { + WARN_ON_ONCE(dsq->id & SCX_DSQ_FLAG_BUILTIN); p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; rb_add_cached(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); @@ -1772,16 +1773,9 @@ static struct task_struct *first_local_task(struct rq *rq) { struct rb_node *rb_node; - if (!list_empty(&rq->scx.local_dsq.fifo)) - return list_first_entry(&rq->scx.local_dsq.fifo, + WARN_ON_ONCE(rb_first_cached(&rq->scx.local_dsq.priq)); + return list_first_entry_or_null(&rq->scx.local_dsq.fifo, struct task_struct, scx.dsq_node.fifo); - - rb_node = rb_first_cached(&rq->scx.local_dsq.priq); - if (rb_node) - return container_of(rb_node, - struct task_struct, scx.dsq_node.priq); - - return NULL; } static struct task_struct *pick_next_task_scx(struct rq *rq) @@ -3948,6 +3942,17 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, if (!scx_dispatch_preamble(p, enq_flags)) return; + /* + * SCX_DSQ_LOCAL and SCX_DSQ_GLOBAL DSQs always consume from their FIFO + * queues. To avoid confusion and accidentally starving + * vtime-dispatched tasks by FIFO-dispatched tasks, we disallow any + * internal DSQ from doing vtime ordering of tasks. + */ + if (dsq_id & SCX_DSQ_FLAG_BUILTIN) { + scx_ops_error("Cannot use vtime ordering for built-in DSQs"); + return; + } + if (slice) p->scx.slice = slice; else From 346fd9d1176f52ecee6739787c6c7c1869c1263d Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 4 Dec 2023 15:53:19 -0600 Subject: [PATCH 2/2] scx: Enforce either/or usage of DSQ FIFO/PRIQ dispatching Currently, a user can do both FIFO and PRIQ dispatching to a DSQ. This can result in non-intuitive behavior. For example, if a user PRIQ-dispatches to a DSQ, and then subsequently FIFO dispatches, an scx_bpf_consume() operation will always favor the FIFO-dispatched task. While we could add something like an scx_bpf_consume_vtime() kfunc, given that there's not a clear use-case for doing both types of dispatching in a single DSQ, for now we'll elect to just enforce that only a single type is being used at any given time. Reported-by: Changwoo Min Signed-off-by: David Vernet --- kernel/sched/ext.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c4fea4f7ff0594..1095d494cdf246 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -651,11 +651,19 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; rb_add_cached(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); + /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ + if (unlikely(!list_empty(&dsq->fifo))) + scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", + dsq->id); } else { if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) list_add(&p->scx.dsq_node.fifo, &dsq->fifo); else list_add_tail(&p->scx.dsq_node.fifo, &dsq->fifo); + /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ + if (unlikely(rb_first_cached(&dsq->priq))) + scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", + dsq->id); } dsq->nr++; p->scx.dsq = dsq;