Skip to content

Commit

Permalink
zram: fix lockdep warning of free block handling
Browse files Browse the repository at this point in the history
[  254.519728] ================================
[  254.520311] WARNING: inconsistent lock state
[  254.520898] 4.19.0+ torvalds#390 Not tainted
[  254.521387] --------------------------------
[  254.521732] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[  254.521732] zram_verify/2095 [HC0[0]:SC1[1]:HE1:SE0] takes:
[  254.521732] 00000000b1828693 (&(&zram->bitmap_lock)->rlock){+.?.}, at: put_entry_bdev+0x1e/0x50
[  254.521732] {SOFTIRQ-ON-W} state was registered at:
[  254.521732]   _raw_spin_lock+0x2c/0x40
[  254.521732]   zram_make_request+0x755/0xdc9
[  254.521732]   generic_make_request+0x373/0x6a0
[  254.521732]   submit_bio+0x6c/0x140
[  254.521732]   __swap_writepage+0x3a8/0x480
[  254.521732]   shrink_page_list+0x1102/0x1a60
[  254.521732]   shrink_inactive_list+0x21b/0x3f0
[  254.521732]   shrink_node_memcg.constprop.99+0x4f8/0x7e0
[  254.521732]   shrink_node+0x7d/0x2f0
[  254.521732]   do_try_to_free_pages+0xe0/0x300
[  254.521732]   try_to_free_pages+0x116/0x2b0
[  254.521732]   __alloc_pages_slowpath+0x3f4/0xf80
[  254.521732]   __alloc_pages_nodemask+0x2a2/0x2f0
[  254.521732]   __handle_mm_fault+0x42e/0xb50
[  254.521732]   handle_mm_fault+0x55/0xb0
[  254.521732]   __do_page_fault+0x235/0x4b0
[  254.521732]   page_fault+0x1e/0x30
[  254.521732] irq event stamp: 228412
[  254.521732] hardirqs last  enabled at (228412): [<ffffffff98245846>] __slab_free+0x3e6/0x600
[  254.521732] hardirqs last disabled at (228411): [<ffffffff98245625>] __slab_free+0x1c5/0x600
[  254.521732] softirqs last  enabled at (228396): [<ffffffff98e0031e>] __do_softirq+0x31e/0x427
[  254.521732] softirqs last disabled at (228403): [<ffffffff98072051>] irq_exit+0xd1/0xe0
[  254.521732]
[  254.521732] other info that might help us debug this:
[  254.521732]  Possible unsafe locking scenario:
[  254.521732]
[  254.521732]        CPU0
[  254.521732]        ----
[  254.521732]   lock(&(&zram->bitmap_lock)->rlock);
[  254.521732]   <Interrupt>
[  254.521732]     lock(&(&zram->bitmap_lock)->rlock);
[  254.521732]
[  254.521732]  *** DEADLOCK ***
[  254.521732]
[  254.521732] no locks held by zram_verify/2095.
[  254.521732]
[  254.521732] stack backtrace:
[  254.521732] CPU: 5 PID: 2095 Comm: zram_verify Not tainted 4.19.0+ torvalds#390
[  254.521732] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
[  254.521732] Call Trace:
[  254.521732]  <IRQ>
[  254.521732]  dump_stack+0x67/0x9b
[  254.521732]  print_usage_bug+0x1bd/0x1d3
[  254.521732]  mark_lock+0x4aa/0x540
[  254.521732]  ? check_usage_backwards+0x160/0x160
[  254.521732]  __lock_acquire+0x51d/0x1300
[  254.521732]  ? free_debug_processing+0x24e/0x400
[  254.521732]  ? bio_endio+0x6d/0x1a0
[  254.521732]  ? lockdep_hardirqs_on+0x9b/0x180
[  254.521732]  ? lock_acquire+0x90/0x180
[  254.521732]  lock_acquire+0x90/0x180
[  254.521732]  ? put_entry_bdev+0x1e/0x50
[  254.521732]  _raw_spin_lock+0x2c/0x40
[  254.521732]  ? put_entry_bdev+0x1e/0x50
[  254.521732]  put_entry_bdev+0x1e/0x50
[  254.521732]  zram_free_page+0xf6/0x110
[  254.521732]  zram_slot_free_notify+0x42/0xa0
[  254.521732]  end_swap_bio_read+0x5b/0x170
[  254.521732]  blk_update_request+0x8f/0x340
[  254.521732]  scsi_end_request+0x2c/0x1e0
[  254.521732]  scsi_io_completion+0x98/0x650
[  254.521732]  blk_done_softirq+0x9e/0xd0
[  254.521732]  __do_softirq+0xcc/0x427
[  254.521732]  irq_exit+0xd1/0xe0
[  254.521732]  do_IRQ+0x93/0x120
[  254.521732]  common_interrupt+0xf/0xf
[  254.521732]  </IRQ>

With writeback feature, zram_slot_free_notify could be called
in softirq context by end_swap_bio_read. However, bitmap_lock
is not aware of that so lockdep yell out. Thanks.

The problem is not only bitmap_lock but it is also zram_slot_lock
so straightforward solution would disable irq on zram_slot_lock
which covers every bitmap_lock, too.
Although duration disabling the irq is short in many places
zram_slot_lock is used, a place(ie, decompress) is not fast
enough to hold irqlock on relying on compression algorithm
so it's not a option.

The approach in this patch is just "best effort", not guarantee
"freeing orphan zpage". If the zram_slot_lock contention may happen,
kernel couldn't free the zpage until it recycles the block. However,
such contention between zram_slot_free_notify and other places to
hold zram_slot_lock should be very rare in real practice.
To see how often it happens, this patch adds new debug stat
"miss_free".

It also adds irq lock in get/put_block_bdev to prevent deadlock
lockdep reported. The reason I used irq disable rather than bottom
half is swap_slot_free_notify could be called with irq disabled
so it breaks local_bh_enable's rule. The irqlock works on only
writebacked zram slot entry so it should be not frequent lock.

Cc: stable@vger.kernel.org # 4.14+
Signed-off-by: Minchan Kim <minchan@kernel.org>
  • Loading branch information
minchank authored and intel-lab-lkp committed Nov 26, 2018
1 parent 1db4909 commit a3f7d47
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
56 changes: 41 additions & 15 deletions drivers/block/zram/zram_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ static size_t huge_class_size;

static void zram_free_page(struct zram *zram, size_t index);

static int zram_slot_trylock(struct zram *zram, u32 index)
{
return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].value);
}

static void zram_slot_lock(struct zram *zram, u32 index)
{
bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
Expand Down Expand Up @@ -443,29 +448,45 @@ static ssize_t backing_dev_store(struct device *dev,

static unsigned long get_entry_bdev(struct zram *zram)
{
unsigned long entry;
unsigned long blk_idx;
unsigned long ret = 0;

spin_lock(&zram->bitmap_lock);
/* skip 0 bit to confuse zram.handle = 0 */
entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
if (entry == zram->nr_pages) {
spin_unlock(&zram->bitmap_lock);
return 0;
blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
if (blk_idx == zram->nr_pages)
goto retry;

spin_lock_irq(&zram->bitmap_lock);
if (test_bit(blk_idx, zram->bitmap)) {
spin_unlock_irq(&zram->bitmap_lock);
goto retry;
}

set_bit(entry, zram->bitmap);
spin_unlock(&zram->bitmap_lock);
set_bit(blk_idx, zram->bitmap);
ret = blk_idx;
goto out;
retry:
spin_lock_irq(&zram->bitmap_lock);
blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
if (blk_idx == zram->nr_pages)
goto out;

set_bit(blk_idx, zram->bitmap);
ret = blk_idx;
out:
spin_unlock_irq(&zram->bitmap_lock);

return entry;
return ret;
}

static void put_entry_bdev(struct zram *zram, unsigned long entry)
{
int was_set;
unsigned long flags;

spin_lock(&zram->bitmap_lock);
spin_lock_irqsave(&zram->bitmap_lock, flags);
was_set = test_and_clear_bit(entry, zram->bitmap);
spin_unlock(&zram->bitmap_lock);
spin_unlock_irqrestore(&zram->bitmap_lock, flags);
WARN_ON_ONCE(!was_set);
}

Expand Down Expand Up @@ -886,9 +907,10 @@ static ssize_t debug_stat_show(struct device *dev,

down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
"version: %d\n%8llu\n",
"version: %d\n%8llu %8llu\n",
version,
(u64)atomic64_read(&zram->stats.writestall));
(u64)atomic64_read(&zram->stats.writestall),
(u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);

return ret;
Expand Down Expand Up @@ -1400,10 +1422,14 @@ static void zram_slot_free_notify(struct block_device *bdev,

zram = bdev->bd_disk->private_data;

zram_slot_lock(zram, index);
atomic64_inc(&zram->stats.notify_free);
if (!zram_slot_trylock(zram, index)) {
atomic64_inc(&zram->stats.miss_free);
return;
}

zram_free_page(zram, index);
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
}

static int zram_rw_page(struct block_device *bdev, sector_t sector,
Expand Down
1 change: 1 addition & 0 deletions drivers/block/zram/zram_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct zram_stats {
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
atomic64_t writestall; /* no. of write slow paths */
atomic64_t miss_free; /* no. of missed free */
};

struct zram {
Expand Down

0 comments on commit a3f7d47

Please sign in to comment.