diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 903c9ee1db4529..6130ab170e9331 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2312,7 +2312,13 @@ static void wake_csd_func(void *info) sched_ttwu_pending(); } -static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) +/* + * Queue a task on the target CPUs wake_list and wake the CPU via IPI if + * necessary. The wakee CPU on receipt of the IPI will queue the task + * via sched_ttwu_wakeup() for activation so the wakee incurs the cost + * of the wakeup instead of the waker. + */ +static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) { struct rq *rq = cpu_rq(cpu); @@ -2355,11 +2361,32 @@ bool cpus_share_cache(int this_cpu, int that_cpu) return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } -static bool ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) +static inline bool ttwu_queue_cond(int cpu, int wake_flags) +{ + /* + * If the CPU does not share cache, then queue the task on the + * remote rqs wakelist to avoid accessing remote data. + */ + if (!cpus_share_cache(smp_processor_id(), cpu)) + return true; + + /* + * If the task is descheduling and the only running task on the + * CPU then use the wakelist to offload the task activation to + * the soon-to-be-idle CPU as the current CPU is likely busy. + * nr_running is checked to avoid unnecessary task stacking. + */ + if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) + return true; + + return false; +} + +static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) { - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { + if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { sched_clock_cpu(cpu); /* Sync clocks across CPUs */ - __ttwu_queue_remote(p, cpu, wake_flags); + __ttwu_queue_wakelist(p, cpu, wake_flags); return true; } @@ -2373,7 +2400,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) struct rq_flags rf; #if defined(CONFIG_SMP) - if (ttwu_queue_remote(p, cpu, wake_flags)) + if (ttwu_queue_wakelist(p, cpu, wake_flags)) return; #endif @@ -2593,7 +2620,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * let the waker make forward progress. This is safe because IRQs are * disabled and the IPI will deliver after on_cpu is cleared. */ - if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags)) + if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) goto unlock; /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f7ab6334e9922b..4b32cff0dcbe9b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1685,7 +1685,8 @@ static inline int task_on_rq_migrating(struct task_struct *p) */ #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* Child wakeup after fork */ -#define WF_MIGRATED 0x4 /* Internal use, task got migrated */ +#define WF_MIGRATED 0x04 /* Internal use, task got migrated */ +#define WF_ON_RQ 0x08 /* Wakee is on_rq */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution