From 3f293a079880d8fd0b84dc867d395818db3846a9 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sun, 8 Oct 2017 17:02:23 +0800 Subject: [PATCH] workqueue: Fix irq inversion deadlock in manage_workers() Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: | [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected | [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted | [ 1270.473240] ----------------------------------------------------- | [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: | [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [] __mutex_unlock_slowpath+0xa2/0x280 | [ 1270.474994] | [ 1270.474994] and this task is already holding: | [ 1270.475440] (&pool->lock/1){-.-.}, at: [] worker_thread+0x366/0x3c0 | [ 1270.476046] which would create a new lock dependency: | [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} | [ 1270.476949] | [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: | [ 1270.477553] (&pool->lock/1){-.-.} ... | [ 1270.488900] to a HARDIRQ-irq-unsafe lock: | [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... | [ 1270.494735] Possible interrupt unsafe locking scenario: | [ 1270.494735] | [ 1270.495250] CPU0 CPU1 | [ 1270.495600] ---- ---- | [ 1270.495947] lock(&(&lock->wait_lock)->rlock); | [ 1270.496295] local_irq_disable(); | [ 1270.496753] lock(&pool->lock/1); | [ 1270.497205] lock(&(&lock->wait_lock)->rlock); | [ 1270.497744] | [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool::manager_arb) in manage_workers() with pool::lock held. An obvious fix is dropping the pool::lock before mutex_unlock() and re-grabing afterwards, which however will introduce a race condition between worker_thread() and put_unbound_pool(): put_unbound_pool() will grab both pool::manager_arb and pool::lock to set all current IDLE workers to DIE, and may wait on the pool::detach_completion for the last worker to detach from the pool. And when manage_workers() is called, the caller worker_thread is in non-ILDE state, so if the worker dropped both pool::{manager_arb, lock} and got delayed for a while long enough for a put_unbound_pool(), the put_unbound_pool() would not switch that worker to DIE. As a result, the worker will not detach from the pool as it's not DIE and the put_unbound_pool() will not proceed as it's waiting for the last worker to detach, therefore deadlock. To overcome this, put the worker back to IDLE state before it drops pool::lock in manage_workers(), and make the worker check again whether it's DIE after it re-grabs the pool::lock. In this way, we fix the potential deadlock reported by lockdep without introducing another. Reported-by: Josef Bacik Signed-off-by: Boqun Feng Cc: Peter Zijlstra --- kernel/workqueue.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 64d0edf428f850..2ea7b04cc48b51 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1997,7 +1997,40 @@ static bool manage_workers(struct worker *worker) maybe_create_worker(pool); pool->manager = NULL; + + /* + * Put the manager back to ->idle_list, this allows us to drop the + * pool->lock safely without racing with put_unbound_pool() + * + * + * worker_thread(): + * spin_lock_irq(&pool->lock); + * worker_leave_idle(); + * manage_workers(): // return true + * mutex_trylock(&pool->manager_arb); + * + * spin_unlock_irq(&pool->lock); + * mutex_unlock(&pool->manager_arb); + * + * put_unbound_pool(): + * mutex_lock(&pool->manager_arb); + * spin_lock_irq(&pool->lock); + * + * + * ... + * wait_for_completion(&pool->detach_completion); + * workers is not empty> + * + * spin_lock_irq(&pool->lock); + * worklist is empty, go to sleep> + * + * No one is going to wake up the manager worker, even so, it won't + * complete(->detach_completion), since it's not a DIE worker. + */ + worker_enter_idle(worker); + spin_unlock_irq(&pool->lock); mutex_unlock(&pool->manager_arb); + spin_lock_irq(&pool->lock); return true; } @@ -2202,6 +2235,7 @@ static int worker_thread(void *__worker) woke_up: spin_lock_irq(&pool->lock); +recheck: /* am I supposed to die? */ if (unlikely(worker->flags & WORKER_DIE)) { spin_unlock_irq(&pool->lock); @@ -2216,7 +2250,6 @@ static int worker_thread(void *__worker) } worker_leave_idle(worker); -recheck: /* no more worker necessary? */ if (!need_more_worker(pool)) goto sleep;