Skip to content

Commit 3669558

Browse files
committed
Merge tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - several fixes for handling directory item (inserting, removing, iteration, error handling) - fix transaction commit stalls when auto relocation is running and blocks other tasks that want to commit - fix a build error when DEBUG is enabled - fix lockdep warning in inode number lookup ioctl - fix race when finishing block group creation - remove link to obsolete wiki in several files * tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org btrfs: assert delayed node locked when removing delayed item btrfs: remove BUG() after failure to insert delayed dir index item btrfs: improve error message after failure to add delayed dir index item btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio btrfs: check for BTRFS_FS_ERROR in pending ordered assert btrfs: fix lockdep splat and potential deadlock after failure running delayed items btrfs: do not block starts waiting on previous transaction commit btrfs: release path before inode lookup during the ino lookup ioctl btrfs: fix race between finishing block group creation and its item update
2 parents 2c758ce + 5facccc commit 3669558

11 files changed

+128
-66
lines changed

Documentation/filesystems/btrfs.rst

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki
3737

3838
https://btrfs.readthedocs.io
3939

40-
https://btrfs.wiki.kernel.org
4140

4241
that maintains information about administration tasks, frequently asked
4342
questions, use cases, mount options, comprehensible changelogs, features,

MAINTAINERS

-1
Original file line numberDiff line numberDiff line change
@@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com>
43784378
L: linux-btrfs@vger.kernel.org
43794379
S: Maintained
43804380
W: https://btrfs.readthedocs.io
4381-
W: https://btrfs.wiki.kernel.org/
43824381
Q: https://patchwork.kernel.org/project/linux-btrfs/list/
43834382
C: irc://irc.libera.chat/btrfs
43844383
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git

fs/btrfs/Kconfig

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ config BTRFS_FS
3131
continue to be mountable and usable by newer kernels.
3232

3333
For more information, please see the web pages at
34-
http://btrfs.wiki.kernel.org.
34+
https://btrfs.readthedocs.io
3535

3636
To compile this file system support as a module, choose M here. The
3737
module will be called btrfs.

fs/btrfs/block-group.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
30283028
btrfs_mark_buffer_dirty(leaf);
30293029
fail:
30303030
btrfs_release_path(path);
3031-
/* We didn't update the block group item, need to revert @commit_used. */
3032-
if (ret < 0) {
3031+
/*
3032+
* We didn't update the block group item, need to revert commit_used
3033+
* unless the block group item didn't exist yet - this is to prevent a
3034+
* race with a concurrent insertion of the block group item, with
3035+
* insert_block_group_item(), that happened just after we attempted to
3036+
* update. In that case we would reset commit_used to 0 just after the
3037+
* insertion set it to a value greater than 0 - if the block group later
3038+
* becomes with 0 used bytes, we would incorrectly skip its update.
3039+
*/
3040+
if (ret < 0 && ret != -ENOENT) {
30333041
spin_lock(&cache->lock);
30343042
cache->commit_used = old_commit_used;
30353043
spin_unlock(&cache->lock);

fs/btrfs/delayed-inode.c

+71-33
Original file line numberDiff line numberDiff line change
@@ -412,25 +412,29 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
412412

413413
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
414414
{
415+
struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
415416
struct rb_root_cached *root;
416417
struct btrfs_delayed_root *delayed_root;
417418

418419
/* Not inserted, ignore it. */
419420
if (RB_EMPTY_NODE(&delayed_item->rb_node))
420421
return;
421422

422-
delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
423+
/* If it's in a rbtree, then we need to have delayed node locked. */
424+
lockdep_assert_held(&delayed_node->mutex);
425+
426+
delayed_root = delayed_node->root->fs_info->delayed_root;
423427

424428
BUG_ON(!delayed_root);
425429

426430
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
427-
root = &delayed_item->delayed_node->ins_root;
431+
root = &delayed_node->ins_root;
428432
else
429-
root = &delayed_item->delayed_node->del_root;
433+
root = &delayed_node->del_root;
430434

431435
rb_erase_cached(&delayed_item->rb_node, root);
432436
RB_CLEAR_NODE(&delayed_item->rb_node);
433-
delayed_item->delayed_node->count--;
437+
delayed_node->count--;
434438

435439
finish_one_item(delayed_root);
436440
}
@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
11531157
ret = __btrfs_commit_inode_delayed_items(trans, path,
11541158
curr_node);
11551159
if (ret) {
1156-
btrfs_release_delayed_node(curr_node);
1157-
curr_node = NULL;
11581160
btrfs_abort_transaction(trans, ret);
11591161
break;
11601162
}
11611163

11621164
prev_node = curr_node;
11631165
curr_node = btrfs_next_delayed_node(curr_node);
1166+
/*
1167+
* See the comment below about releasing path before releasing
1168+
* node. If the commit of delayed items was successful the path
1169+
* should always be released, but in case of an error, it may
1170+
* point to locked extent buffers (a leaf at the very least).
1171+
*/
1172+
ASSERT(path->nodes[0] == NULL);
11641173
btrfs_release_delayed_node(prev_node);
11651174
}
11661175

1176+
/*
1177+
* Release the path to avoid a potential deadlock and lockdep splat when
1178+
* releasing the delayed node, as that requires taking the delayed node's
1179+
* mutex. If another task starts running delayed items before we take
1180+
* the mutex, it will first lock the mutex and then it may try to lock
1181+
* the same btree path (leaf).
1182+
*/
1183+
btrfs_free_path(path);
1184+
11671185
if (curr_node)
11681186
btrfs_release_delayed_node(curr_node);
1169-
btrfs_free_path(path);
11701187
trans->block_rsv = block_rsv;
11711188

11721189
return ret;
@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
14131430
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
14141431
}
14151432

1416-
/* Will return 0 or -ENOMEM */
1433+
static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
1434+
{
1435+
struct btrfs_fs_info *fs_info = trans->fs_info;
1436+
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
1437+
1438+
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1439+
return;
1440+
1441+
/*
1442+
* Adding the new dir index item does not require touching another
1443+
* leaf, so we can release 1 unit of metadata that was previously
1444+
* reserved when starting the transaction. This applies only to
1445+
* the case where we had a transaction start and excludes the
1446+
* transaction join case (when replaying log trees).
1447+
*/
1448+
trace_btrfs_space_reservation(fs_info, "transaction",
1449+
trans->transid, bytes, 0);
1450+
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
1451+
ASSERT(trans->bytes_reserved >= bytes);
1452+
trans->bytes_reserved -= bytes;
1453+
}
1454+
1455+
/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
14171456
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
14181457
const char *name, int name_len,
14191458
struct btrfs_inode *dir,
@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
14551494

14561495
mutex_lock(&delayed_node->mutex);
14571496

1497+
/*
1498+
* First attempt to insert the delayed item. This is to make the error
1499+
* handling path simpler in case we fail (-EEXIST). There's no risk of
1500+
* any other task coming in and running the delayed item before we do
1501+
* the metadata space reservation below, because we are holding the
1502+
* delayed node's mutex and that mutex must also be locked before the
1503+
* node's delayed items can be run.
1504+
*/
1505+
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
1506+
if (unlikely(ret)) {
1507+
btrfs_err(trans->fs_info,
1508+
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
1509+
name_len, name, index, btrfs_root_id(delayed_node->root),
1510+
delayed_node->inode_id, dir->index_cnt,
1511+
delayed_node->index_cnt, ret);
1512+
btrfs_release_delayed_item(delayed_item);
1513+
btrfs_release_dir_index_item_space(trans);
1514+
mutex_unlock(&delayed_node->mutex);
1515+
goto release_node;
1516+
}
1517+
14581518
if (delayed_node->index_item_leaves == 0 ||
14591519
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
14601520
delayed_node->curr_index_batch_size = data_len;
@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
14721532
* impossible.
14731533
*/
14741534
if (WARN_ON(ret)) {
1475-
mutex_unlock(&delayed_node->mutex);
14761535
btrfs_release_delayed_item(delayed_item);
1536+
mutex_unlock(&delayed_node->mutex);
14771537
goto release_node;
14781538
}
14791539

14801540
delayed_node->index_item_leaves++;
1481-
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
1482-
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
1483-
1484-
/*
1485-
* Adding the new dir index item does not require touching another
1486-
* leaf, so we can release 1 unit of metadata that was previously
1487-
* reserved when starting the transaction. This applies only to
1488-
* the case where we had a transaction start and excludes the
1489-
* transaction join case (when replaying log trees).
1490-
*/
1491-
trace_btrfs_space_reservation(fs_info, "transaction",
1492-
trans->transid, bytes, 0);
1493-
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
1494-
ASSERT(trans->bytes_reserved >= bytes);
1495-
trans->bytes_reserved -= bytes;
1496-
}
1497-
1498-
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
1499-
if (unlikely(ret)) {
1500-
btrfs_err(trans->fs_info,
1501-
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1502-
name_len, name, delayed_node->root->root_key.objectid,
1503-
delayed_node->inode_id, ret);
1504-
BUG();
1541+
} else {
1542+
btrfs_release_dir_index_item_space(trans);
15051543
}
15061544
mutex_unlock(&delayed_node->mutex);
15071545

fs/btrfs/disk-io.c

+12-10
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
520520
struct folio *folio)
521521
{
522522
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
523+
struct btrfs_subpage_info *spi = fs_info->subpage_info;
523524
struct btrfs_subpage *subpage;
524525
struct extent_buffer *eb;
525526
int cur_bit = 0;
@@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping,
533534
btrfs_assert_tree_write_locked(eb);
534535
return filemap_dirty_folio(mapping, folio);
535536
}
537+
538+
ASSERT(spi);
536539
subpage = folio_get_private(folio);
537540

538-
ASSERT(subpage->dirty_bitmap);
539-
while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
541+
for (cur_bit = spi->dirty_offset;
542+
cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
543+
cur_bit++) {
540544
unsigned long flags;
541545
u64 cur;
542-
u16 tmp = (1 << cur_bit);
543546

544547
spin_lock_irqsave(&subpage->lock, flags);
545-
if (!(tmp & subpage->dirty_bitmap)) {
548+
if (!test_bit(cur_bit, subpage->bitmaps)) {
546549
spin_unlock_irqrestore(&subpage->lock, flags);
547-
cur_bit++;
548550
continue;
549551
}
550552
spin_unlock_irqrestore(&subpage->lock, flags);
@@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
557559
btrfs_assert_tree_write_locked(eb);
558560
free_extent_buffer(eb);
559561

560-
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
562+
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
561563
}
562564
return filemap_dirty_folio(mapping, folio);
563565
}
@@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg)
15471549

15481550
delta = ktime_get_seconds() - cur->start_time;
15491551
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
1550-
cur->state < TRANS_STATE_COMMIT_START &&
1552+
cur->state < TRANS_STATE_COMMIT_PREP &&
15511553
delta < fs_info->commit_interval) {
15521554
spin_unlock(&fs_info->trans_lock);
15531555
delay -= msecs_to_jiffies((delta - 1) * 1000);
@@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
26822684
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
26832685
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
26842686
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
2685-
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
2686-
BTRFS_LOCKDEP_TRANS_COMMIT_START);
2687+
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
2688+
BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
26872689
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
26882690
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
26892691
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
@@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
48704872
while (!list_empty(&fs_info->trans_list)) {
48714873
t = list_first_entry(&fs_info->trans_list,
48724874
struct btrfs_transaction, list);
4873-
if (t->state >= TRANS_STATE_COMMIT_START) {
4875+
if (t->state >= TRANS_STATE_COMMIT_PREP) {
48744876
refcount_inc(&t->use_count);
48754877
spin_unlock(&fs_info->trans_lock);
48764878
btrfs_wait_for_commit(fs_info, t->transid);

fs/btrfs/ioctl.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
19581958
goto out_put;
19591959
}
19601960

1961+
/*
1962+
* We don't need the path anymore, so release it and
1963+
* avoid deadlocks and lockdep warnings in case
1964+
* btrfs_iget() needs to lookup the inode from its root
1965+
* btree and lock the same leaf.
1966+
*/
1967+
btrfs_release_path(path);
19611968
temp_inode = btrfs_iget(sb, key2.objectid, root);
19621969
if (IS_ERR(temp_inode)) {
19631970
ret = PTR_ERR(temp_inode);
@@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
19781985
goto out_put;
19791986
}
19801987

1981-
btrfs_release_path(path);
19821988
key.objectid = key.offset;
19831989
key.offset = (u64)-1;
19841990
dirid = key.objectid;

fs/btrfs/locking.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
7979
};
8080

8181
enum btrfs_lockdep_trans_states {
82-
BTRFS_LOCKDEP_TRANS_COMMIT_START,
82+
BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
8383
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
8484
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
8585
BTRFS_LOCKDEP_TRANS_COMPLETED,

fs/btrfs/ordered-data.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
639639
refcount_inc(&trans->use_count);
640640
spin_unlock(&fs_info->trans_lock);
641641

642-
ASSERT(trans);
642+
ASSERT(trans || BTRFS_FS_ERROR(fs_info));
643643
if (trans) {
644644
if (atomic_dec_and_test(&trans->pending_ordered))
645645
wake_up(&trans->pending_wait);

0 commit comments

Comments
 (0)