diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index ea6c81f9d1a36e..54cfab7394d377 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -431,6 +431,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) u64 zone_start = round_down(physical, dev->fs_info->zone_size); ASSERT(btrfs_dev_is_sequential(dev, physical)); + btrfs_bio(bio)->orig_physical = physical; bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT; } btrfs_debug_in_rcu(dev->fs_info, @@ -685,6 +686,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) goto fail_put_bio; + } else if (use_append) { + ret = btrfs_alloc_dummy_sum(bbio); + if (ret) + goto fail_put_bio; } } diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h index a8eca3a6567320..8a29980159b404 100644 --- a/fs/btrfs/bio.h +++ b/fs/btrfs/bio.h @@ -39,8 +39,8 @@ struct btrfs_bio { union { /* - * Data checksumming and original I/O information for internal - * use in the btrfs_submit_bio machinery. + * For data reads: checksumming and original I/O information. + * (for internal use in the btrfs_submit_bio machinery only) */ struct { u8 *csum; @@ -48,7 +48,18 @@ struct btrfs_bio { struct bvec_iter saved_iter; }; - /* For metadata parentness verification. */ + /* + * For data writes: + * - pointer to the checksums for this bio + * - original physical address from the allocator + * (for zone append only) + */ + struct { + struct btrfs_ordered_sum *sums; + u64 orig_physical; + }; + + /* For metadata reads: parentness verification. */ struct btrfs_tree_parent_check parent_check; }; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 415e50904db311..0cb4a9921d21ed 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -818,11 +818,41 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) } this_sum_bytes = 0; + + /* + * The ->sums assignment is for zoned writes, where a bio never spans + * ordered extents and is only done unconditionally because that's cheaper + * than a branch. + */ + bbio->sums = sums; btrfs_add_ordered_sum(ordered, sums); btrfs_put_ordered_extent(ordered); return 0; } +/* + * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to + * record the updated logical address on Zone Append completion. + * Allocate just the structure with an empty sums array here for that case. + */ +blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) +{ + struct btrfs_ordered_extent *ordered = + btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); + + if (WARN_ON_ONCE(!ordered)) + return BLK_STS_IOERR; + + bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS); + if (!bbio->sums) + return BLK_STS_RESOURCE; + bbio->sums->len = bbio->bio.bi_iter.bi_size; + bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; + btrfs_add_ordered_sum(ordered, bbio->sums); + btrfs_put_ordered_extent(ordered); + return 0; +} + /* * Remove one checksum overlapping a range. * diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h index 6be8725cd57474..4ec669b690080a 100644 --- a/fs/btrfs/file-item.h +++ b/fs/btrfs/file-item.h @@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums); blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio); +blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit, bool nowait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ad8196f31cdb3a..31c5b7c176d366 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3301,14 +3301,10 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - /* A valid ->physical implies a write on a sequential zone. */ - if (ordered_extent->physical != (u64)-1) { + if (btrfs_is_zoned(fs_info)) { btrfs_rewrite_logical_zoned(ordered_extent); btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, ordered_extent->disk_num_bytes); - } else if (btrfs_is_data_reloc_root(inode->root)) { - btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, - ordered_extent->disk_num_bytes); } if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a9778a91511e19..324a5a8c844a72 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -209,7 +209,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( entry->compress_type = compress_type; entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; - entry->physical = (u64)-1; ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); entry->flags = flags; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ebc980ac967ad4..dc700aa515b58b 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -151,12 +151,6 @@ struct btrfs_ordered_extent { struct completion completion; struct btrfs_work flush_work; struct list_head work_list; - - /* - * Used to reverse-map physical address returned from ZONE_APPEND write - * command in a workqueue context - */ - u64 physical; }; static inline void diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index eca49e6e0e5f50..b55b0d4ee86f85 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1657,51 +1657,28 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio) void btrfs_record_physical_zoned(struct btrfs_bio *bbio) { const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; - struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_sum *sum = bbio->sums; - ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); - if (WARN_ON(!ordered)) - return; - - ordered->physical = physical; - btrfs_put_ordered_extent(ordered); + if (physical < bbio->orig_physical) + sum->logical -= bbio->orig_physical - physical; + else + sum->logical += physical - bbio->orig_physical; } void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) { struct btrfs_inode *inode = BTRFS_I(ordered->inode); - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct extent_map_tree *em_tree; + struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; - struct btrfs_ordered_sum *sum; - u64 orig_logical = ordered->disk_bytenr; - struct map_lookup *map; - u64 physical = ordered->physical; - u64 chunk_start_phys; - u64 logical; + struct btrfs_ordered_sum *sum = + list_first_entry(&ordered->list, typeof(*sum), list); + u64 logical = sum->logical; - em = btrfs_get_chunk_map(fs_info, orig_logical, 1); - if (IS_ERR(em)) - return; - map = em->map_lookup; - chunk_start_phys = map->stripes[0].physical; - - if (WARN_ON_ONCE(map->num_stripes > 1) || - WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) || - WARN_ON_ONCE(physical < chunk_start_phys) || - WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) { - free_extent_map(em); - return; - } - logical = em->start + (physical - map->stripes[0].physical); - free_extent_map(em); - - if (orig_logical == logical) - return; + if (ordered->disk_bytenr == logical) + goto out; ordered->disk_bytenr = logical; - em_tree = &inode->extent_tree; write_lock(&em_tree->lock); em = search_extent_mapping(em_tree, ordered->file_offset, ordered->num_bytes); @@ -1709,11 +1686,17 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) free_extent_map(em); write_unlock(&em_tree->lock); - list_for_each_entry(sum, &ordered->list, list) { - if (logical < orig_logical) - sum->logical -= orig_logical - logical; - else - sum->logical += logical - orig_logical; +out: + /* + * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures + * were allocated by btrfs_alloc_dummy_sum only to record the logical + * addresses and don't contain actual checksums. We thus must free them + * here so that we don't attempt to log the csums later. + */ + if ((inode->flags & BTRFS_INODE_NODATASUM) || + test_bit(BTRFS_FS_STATE_NO_CSUMS, &inode->root->fs_info->fs_state)) { + list_del(&sum->list); + kfree(sum); } }