diff --git a/block/blk-core.c b/block/blk-core.c index bc5e8c5eaac9ff..2c3ca6d405e2c7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -384,6 +384,7 @@ static void blk_timeout_work(struct work_struct *work) struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) { + static struct lock_class_key __q_usage_counter_key; struct request_queue *q; int error; @@ -441,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL); if (error) goto fail_stats; + lockdep_init_map(&q->q_usage_counter_map, "q->q_usage_counter", + &__q_usage_counter_key, 0); q->nr_requests = BLKDEV_DEFAULT_RQ; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5463697a84428e..d0edac3b8b080e 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -188,6 +188,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(NO_SCHED_BY_DEFAULT), + HCTX_FLAG_NAME(SKIP_FREEZE_LOCKDEP), }; #undef HCTX_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index bf87b7718b2917..0ed500fa52d49a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -122,7 +122,10 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, void blk_freeze_queue_start(struct request_queue *q) { + int sub_class; + mutex_lock(&q->mq_freeze_lock); + sub_class = q->mq_freeze_depth; if (++q->mq_freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); mutex_unlock(&q->mq_freeze_lock); @@ -131,6 +134,12 @@ void blk_freeze_queue_start(struct request_queue *q) } else { mutex_unlock(&q->mq_freeze_lock); } + /* + * model as down_write_trylock() so that two concurrent freeze queue + * can be allowed + */ + if (blk_queue_freeze_lockdep(q)) + rwsem_acquire(&q->q_usage_counter_map, sub_class, 1, _RET_IP_); } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -188,6 +197,9 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) wake_up_all(&q->mq_freeze_wq); } mutex_unlock(&q->mq_freeze_lock); + + if (blk_queue_freeze_lockdep(q)) + rwsem_release(&q->q_usage_counter_map, _RET_IP_); } void blk_mq_unfreeze_queue(struct request_queue *q) @@ -4241,6 +4253,9 @@ void blk_mq_destroy_queue(struct request_queue *q) blk_queue_start_drain(q); blk_mq_freeze_queue_wait(q); + /* counter pair of acquire in blk_queue_start_drain */ + if (blk_queue_freeze_lockdep(q)) + rwsem_release(&q->q_usage_counter_map, _RET_IP_); blk_sync_queue(q); blk_mq_cancel_work_sync(q); blk_mq_exit_queue(q); diff --git a/block/blk.h b/block/blk.h index 8fddaf6eae49da..8781624159832b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -4,6 +4,7 @@ #include #include +#include #include /* for max_pfn/max_low_pfn */ #include #include @@ -43,6 +44,8 @@ void bio_await_chain(struct bio *bio); static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) { + /* model as down_read() for lockdep */ + rwsem_acquire_read(&q->q_usage_counter_map, 0, 0, _RET_IP_); rcu_read_lock(); if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) goto fail; @@ -56,12 +59,18 @@ static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) goto fail_put; rcu_read_unlock(); + /* + * queue exit often happen in other context, so we simply annotate + * release here, still lots of cases can be covered + */ + rwsem_release(&q->q_usage_counter_map, _RET_IP_); return true; fail_put: blk_queue_exit(q); fail: rcu_read_unlock(); + rwsem_release(&q->q_usage_counter_map, _RET_IP_); return false; } diff --git a/block/genhd.c b/block/genhd.c index bc30eee7ab16f0..52050516ffcde2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -742,6 +742,9 @@ void del_gendisk(struct gendisk *disk) blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); __blk_mq_unfreeze_queue(q, true); } else { + /* counter pair of acquire in blk_queue_start_drain */ + if (blk_queue_freeze_lockdep(q)) + rwsem_release(&q->q_usage_counter_map, _RET_IP_); if (queue_is_mq(q)) blk_mq_exit_queue(q); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 43d73d31c66f36..9c422bfd2bfb19 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4528,7 +4528,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, /* Reserved for fabric connect and keep alive */ set->reserved_tags = 2; set->numa_node = ctrl->numa_node; - set->flags = BLK_MQ_F_NO_SCHED; + set->flags = BLK_MQ_F_NO_SCHED | BLK_MQ_F_SKIP_FREEZE_LOCKDEP; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; @@ -4598,7 +4598,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, /* Reserved for fabric connect */ set->reserved_tags = 1; set->numa_node = ctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SKIP_FREEZE_LOCKDEP; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 59e9adf815a491..5662ab4d38869e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -687,7 +687,10 @@ enum { * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 7, + + BLK_MQ_F_SKIP_FREEZE_LOCKDEP = 1 << 7, + + BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b78a68e0bd9c6..5a8e5a7370a450 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ #include #include #include +#include struct module; struct request_queue; @@ -474,6 +475,9 @@ struct request_queue { struct xarray hctx_table; struct percpu_ref q_usage_counter; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map q_usage_counter_map; +#endif struct request *last_merge; @@ -640,6 +644,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) +#define blk_queue_freeze_lockdep(q) \ + !(q->tag_set->flags & BLK_MQ_F_SKIP_FREEZE_LOCKDEP) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q);