From f5a8234bd5c1e9e3626257e9250bc5b6dac34454 Mon Sep 17 00:00:00 2001 From: John Attinella Date: Fri, 8 May 2020 14:58:15 -0500 Subject: [PATCH 1/2] lwksched: fix CPU assignment during overcommmit (#2) When CPUs are overcommitted in an LWK partition, the additional threads were being assigned to the CPU of the thread that created the new threads. For example, if 10 CPUs were in the reservation for the process and 100 threads were created by the main thread of the process, there would be 1 thread running on each of the upper 9 CPUs and 91 threads all trying to all run on the first CPU of the reservation. This mOS problem was introduced during a recent rebase in which Linux changed the clone system call code flow. Change-Id: I068dd29ebdb8dc195798a68ef268f5719247e59d Signed-off-by: John Attinella --- kernel/sched/core.c | 3 +-- kernel/sched/mos.c | 12 ++++++++---- kernel/sched/mos.h | 11 +++++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 79fe819cd03dd..24818f3efc772 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2798,8 +2798,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->rt.time_slice = sched_rr_timeslice; p->rt.on_rq = 0; p->rt.on_list = 0; - init_run_list_mos(p); - init_util_list_mos(p); + init_fork_mos(p); #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); diff --git a/kernel/sched/mos.c b/kernel/sched/mos.c index 309ec3909d4ad..f51364620c25a 100644 --- a/kernel/sched/mos.c +++ b/kernel/sched/mos.c @@ -2991,8 +2991,13 @@ static void task_fork_mos(struct task_struct *p) p->prio = current->prio; p->normal_prio = current->prio; - p->mos.thread_type = mos_thread_type_normal; - p->mos.cpu_home = -1; + + /* We need to remove the commit placed on the CPU that called clone. + * This occurred in core.c->sched_fork->__select_task_cpu. This needs + * to be done prior to selecting the target CPU for this new thread + * which occurs in core.c->wake_up_new_task. + */ + uncommit_cpu(p); /* * We need to set the cpus allowed mask appropriately. If this is @@ -3054,8 +3059,7 @@ static void task_fork_mos(struct task_struct *p) void mos_set_task_cpu(struct task_struct *p, int new_cpu) { - if (task_cpu(p) != new_cpu && - cpu_rq(new_cpu)->lwkcpu && + if (cpu_rq(new_cpu)->lwkcpu && p->mos_process && new_cpu != p->mos.cpu_home) { /* Release a previous commit if it exists */ diff --git a/kernel/sched/mos.h b/kernel/sched/mos.h index a8ee3baad6bf0..0d991dfba2dca 100644 --- a/kernel/sched/mos.h +++ b/kernel/sched/mos.h @@ -143,6 +143,14 @@ static inline bool is_migration_mask_valid_mos(const cpumask_t *mask, return false; } +static inline void init_fork_mos(struct task_struct *p) +{ + p->mos.cpu_home = -1; + p->mos.thread_type = mos_thread_type_normal; + init_run_list_mos(p); + init_util_list_mos(p); +} + #else static inline void assimilate_mos(struct rq *rq, struct task_struct *p) @@ -236,6 +244,9 @@ static inline bool is_migration_mask_valid_mos(const cpumask_t *mask, return false; } +static inline void init_fork_mos(struct task_struct *p) +{} + #endif #ifdef CONFIG_MOS_MOVE_SYSCALLS From 2ca9a02ed69db9e4840bf98591484c22d6b967c1 Mon Sep 17 00:00:00 2001 From: John Attinella Date: Tue, 19 May 2020 12:42:34 +0000 Subject: [PATCH 2/2] lwksched: Fix deadlock during thread exit The mOS schduler has a spin lock in its CPU-scoped run-queue object. This spin lock is obtained when we are committing and un-comitting a thread to a specific CPU. All calls to the spin lock must occur when interrupts are disabled. However, in the thread exit path interrupts are enabled. In this exit path we are un-commtting the exiting thread. While we held the spin lock, a scheduler timer tick fired. This fired because we have over-committed threads on this CPU (we disable the timer tick if we are not over-comitted). The timer tick processing drove us through the mOS code to wake up and dispatch another thread. This flow attempts to obtain the spin lock to commit the thread to this run-queue, resulting in a deadlock. The fix is to use a more robust spin-lock interface to lock and unlock which guarantees that interrupts are always disabled while the spin-lock is held. Change-Id: I6b296abc4a78c2b143973a27ae1e0e12b996904f Signed-off-by: John Attinella --- kernel/sched/mos.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/sched/mos.c b/kernel/sched/mos.c index f51364620c25a..85976721650ed 100644 --- a/kernel/sched/mos.c +++ b/kernel/sched/mos.c @@ -345,13 +345,14 @@ static void uncommit_cpu(struct task_struct *p) struct mos_rq *mos_rq; int cpu = p->mos.cpu_home; int underflow = 0; + unsigned long flags; if (cpu < 0) return; mos_rq = &cpu_rq(cpu)->mos; p->mos.cpu_home = -1; - raw_spin_lock(&mos_rq->lock); + raw_spin_lock_irqsave(&mos_rq->lock, flags); if (p->mos.thread_type == mos_thread_type_normal) { if (mos_rq->compute_commits > 0) mos_rq->compute_commits--; @@ -364,7 +365,7 @@ static void uncommit_cpu(struct task_struct *p) else underflow = 1; } - raw_spin_unlock(&mos_rq->lock); + raw_spin_unlock_irqrestore(&mos_rq->lock, flags); trace_mos_cpu_uncommit(p, cpu, mos_rq->compute_commits, mos_rq->utility_commits, underflow); @@ -375,11 +376,12 @@ static void commit_cpu(struct task_struct *p, int cpu) struct mos_rq *mos_rq; unsigned int newval = 0; int overflow = 0; + unsigned long flags; if (cpu < 0) return; mos_rq = &cpu_rq(cpu)->mos; - raw_spin_lock(&mos_rq->lock); + raw_spin_lock_irqsave(&mos_rq->lock, flags); if (p->mos.thread_type == mos_thread_type_normal) { if (mos_rq->compute_commits < INT_MAX) { newval = ++mos_rq->compute_commits; @@ -395,7 +397,7 @@ static void commit_cpu(struct task_struct *p, int cpu) } else overflow = 1; } - raw_spin_unlock(&mos_rq->lock); + raw_spin_unlock_irqrestore(&mos_rq->lock, flags); p->mos.cpu_home = cpu; trace_mos_cpu_commit(p, cpu, mos_rq->compute_commits, mos_rq->utility_commits, overflow);