Skip to content

Commit

Permalink
sched/core: Rework the __schedule() preempt argument
Browse files Browse the repository at this point in the history
PREEMPT_RT needs to hand a special state into __schedule() when a task
blocks on a 'sleeping' spin/rwlock. This is required to handle
rcu_note_context_switch() correctly without having special casing in the
RCU code. From an RCU point of view the blocking on the sleeping spinlock
is equivalent to preemption, because the task might be in a read side
critical section.

schedule_debug() also has a check which would trigger with the !preempt
case, but that could be handled differently.

To avoid adding another argument and extra checks which cannot be optimized
out by the compiler, the following solution has been chosen:

 - Replace the boolean 'preempt' argument with an unsigned integer
   'sched_mode' argument and define constants to hand in:
   (0 == no preemption, 1 = preemption).

 - Add two masks to apply on that mode: one for the debug/rcu invocations,
   and one for the actual scheduling decision.

   For a non RT kernel these masks are UINT_MAX, i.e. all bits are set,
   which allows the compiler to optimize the AND operation out, because it is
   not masking out anything. IOW, it's not different from the boolean.

   RT enabled kernels will define these masks separately.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210815211302.315473019@linutronix.de
  • Loading branch information
KAGA-KOKO authored and Ingo Molnar committed Aug 17, 2021
1 parent 5f220be commit b4bfa3f
Showing 1 changed file with 23 additions and 11 deletions.
34 changes: 23 additions & 11 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5819,6 +5819,18 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)

#endif /* CONFIG_SCHED_CORE */

/*
* Constants for the sched_mode argument of __schedule().
*
* The mode argument allows RT enabled kernels to differentiate a
* preemption from blocking on an 'sleeping' spin/rwlock. Note that
* SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
* optimize the AND operation out and just check for zero.
*/
#define SM_NONE 0x0
#define SM_PREEMPT 0x1
#define SM_MASK_PREEMPT (~0U)

/*
* __schedule() is the main scheduler function.
*
Expand Down Expand Up @@ -5858,7 +5870,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
*
* WARNING: must be called with preemption disabled!
*/
static void __sched notrace __schedule(bool preempt)
static void __sched notrace __schedule(unsigned int sched_mode)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
Expand All @@ -5871,13 +5883,13 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;

schedule_debug(prev, preempt);
schedule_debug(prev, !!sched_mode);

if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
hrtick_clear(rq);

local_irq_disable();
rcu_note_context_switch(preempt);
rcu_note_context_switch(!!sched_mode);

/*
* Make sure that signal_pending_state()->signal_pending() below
Expand Down Expand Up @@ -5911,7 +5923,7 @@ static void __sched notrace __schedule(bool preempt)
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
*/
prev_state = READ_ONCE(prev->__state);
if (!preempt && prev_state) {
if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
if (signal_pending_state(prev_state, prev)) {
WRITE_ONCE(prev->__state, TASK_RUNNING);
} else {
Expand Down Expand Up @@ -5977,7 +5989,7 @@ static void __sched notrace __schedule(bool preempt)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));

trace_sched_switch(preempt, prev, next);
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);

/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
Expand All @@ -5998,7 +6010,7 @@ void __noreturn do_task_dead(void)
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;

__schedule(false);
__schedule(SM_NONE);
BUG();

/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
Expand Down Expand Up @@ -6059,7 +6071,7 @@ asmlinkage __visible void __sched schedule(void)
sched_submit_work(tsk);
do {
preempt_disable();
__schedule(false);
__schedule(SM_NONE);
sched_preempt_enable_no_resched();
} while (need_resched());
sched_update_worker(tsk);
Expand Down Expand Up @@ -6087,7 +6099,7 @@ void __sched schedule_idle(void)
*/
WARN_ON_ONCE(current->__state);
do {
__schedule(false);
__schedule(SM_NONE);
} while (need_resched());
}

Expand Down Expand Up @@ -6140,7 +6152,7 @@ static void __sched notrace preempt_schedule_common(void)
*/
preempt_disable_notrace();
preempt_latency_start(1);
__schedule(true);
__schedule(SM_PREEMPT);
preempt_latency_stop(1);
preempt_enable_no_resched_notrace();

Expand Down Expand Up @@ -6219,7 +6231,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
* an infinite recursion.
*/
prev_ctx = exception_enter();
__schedule(true);
__schedule(SM_PREEMPT);
exception_exit(prev_ctx);

preempt_latency_stop(1);
Expand Down Expand Up @@ -6368,7 +6380,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
do {
preempt_disable();
local_irq_enable();
__schedule(true);
__schedule(SM_PREEMPT);
local_irq_disable();
sched_preempt_enable_no_resched();
} while (need_resched());
Expand Down

0 comments on commit b4bfa3f

Please sign in to comment.