diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index d96561d1ce9087..6934a5b8708fed 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2017,8 +2017,17 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) return flags; } -int btrfs_inc_block_group_ro(struct btrfs_block_group *cache) - +/* + * Mark one block group RO, can be called several times for the same block + * group. + * + * @cache: the destination block group + * @do_chunk_alloc: whether need to do chunk pre-allocation, this is to + * ensure we still have some free space after marking this + * block group RO. + */ +int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, + bool do_chunk_alloc) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_trans_handle *trans; @@ -2048,25 +2057,29 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache) goto again; } - /* - * if we are changing raid levels, try to allocate a corresponding - * block group with the new raid level. - */ - alloc_flags = update_block_group_flags(fs_info, cache->flags); - if (alloc_flags != cache->flags) { - ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + if (do_chunk_alloc) { /* - * ENOSPC is allowed here, we may have enough space - * already allocated at the new raid level to - * carry on + * If we are changing raid levels, try to allocate a + * corresponding block group with the new raid level. */ - if (ret == -ENOSPC) - ret = 0; - if (ret < 0) - goto out; + alloc_flags = update_block_group_flags(fs_info, cache->flags); + if (alloc_flags != cache->flags) { + ret = btrfs_chunk_alloc(trans, alloc_flags, + CHUNK_ALLOC_FORCE); + /* + * ENOSPC is allowed here, we may have enough space + * already allocated at the new raid level to carry on + */ + if (ret == -ENOSPC) + ret = 0; + if (ret < 0) + goto out; + } } - ret = inc_block_group_ro(cache, 0); + ret = inc_block_group_ro(cache, !do_chunk_alloc); + if (!do_chunk_alloc) + goto unlock_out; if (!ret) goto out; alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); @@ -2081,6 +2094,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache) check_system_chunk(trans, alloc_flags); mutex_unlock(&fs_info->chunk_mutex); } +unlock_out: mutex_unlock(&fs_info->ro_block_group_mutex); btrfs_end_transaction(trans); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 4e7afc02879143..9b409676c4b2bf 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -206,7 +206,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info); int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, u64 type, u64 chunk_offset, u64 size); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans); -int btrfs_inc_block_group_ro(struct btrfs_block_group *cache); +int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, + bool do_chunk_alloc); void btrfs_dec_block_group_ro(struct btrfs_block_group *cache); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a857fc8271d270..2e16701c6099e8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4325,7 +4325,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) rc->extent_root = extent_root; rc->block_group = bg; - ret = btrfs_inc_block_group_ro(rc->block_group); + ret = btrfs_inc_block_group_ro(rc->block_group, true); if (ret) { err = ret; goto out; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e2c87220600f40..bd3f2266c5f49e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3559,7 +3559,26 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * -> btrfs_scrub_pause() */ scrub_pause_on(fs_info); - ret = btrfs_inc_block_group_ro(cache); + + /* + * Don't do chunk preallocation for scrub. + * + * This is especially important for SYSTEM bgs, or we can hit + * -EFBIG from btrfs_finish_chunk_alloc() like: + * 1. The only SYSTEM bg is marked RO. + * Since SYSTEM bg is small, that's pretty common. + * 2. New SYSTEM bg will be allocated + * Due to regular version will allocate new chunk. + * 3. New SYSTEM bg is empty and will get cleaned up + * Before cleanup really happens, it's marked RO again. + * 4. Empty SYSTEM bg get scrubbed + * We go back to 2. + * + * This can easily boost the amount of SYSTEM chunks if cleaner + * thread can't be triggered fast enough, and use up all space + * of btrfs_super_block::sys_chunk_array + */ + ret = btrfs_inc_block_group_ro(cache, false); if (!ret && sctx->is_dev_replace) { /* * If we are doing a device replace wait for any tasks