Skip to content

Commit 169e0da

Browse files
naotakdave
authored andcommitted
btrfs: zoned: track unusable bytes for zones
In a zoned filesystem a once written then freed region is not usable until the underlying zone has been reset. So we need to distinguish such unusable space from usable free space. Therefore we need to introduce the "zone_unusable" field to the block group structure, and "bytes_zone_unusable" to the space_info structure to track the unusable space. Pinned bytes are always reclaimed to the unusable space. But, when an allocated region is returned before using e.g., the block group becomes read-only between allocation time and reservation time, we can safely return the region to the block group. For the situation, this commit introduces "btrfs_add_free_space_unused". This behaves the same as btrfs_add_free_space() on regular filesystem. On zoned filesystems, it rewinds the allocation offset. Because the read-only bytes tracks free but unusable bytes when the block group is read-only, we need to migrate the zone_unusable bytes to read-only bytes when a block group is marked read-only. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent a94794d commit 169e0da

File tree

10 files changed

+151
-18
lines changed

10 files changed

+151
-18
lines changed

fs/btrfs/block-group.c

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,12 +1009,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10091009
WARN_ON(block_group->space_info->total_bytes
10101010
< block_group->length);
10111011
WARN_ON(block_group->space_info->bytes_readonly
1012-
< block_group->length);
1012+
< block_group->length - block_group->zone_unusable);
1013+
WARN_ON(block_group->space_info->bytes_zone_unusable
1014+
< block_group->zone_unusable);
10131015
WARN_ON(block_group->space_info->disk_total
10141016
< block_group->length * factor);
10151017
}
10161018
block_group->space_info->total_bytes -= block_group->length;
1017-
block_group->space_info->bytes_readonly -= block_group->length;
1019+
block_group->space_info->bytes_readonly -=
1020+
(block_group->length - block_group->zone_unusable);
1021+
block_group->space_info->bytes_zone_unusable -=
1022+
block_group->zone_unusable;
10181023
block_group->space_info->disk_total -= block_group->length * factor;
10191024

10201025
spin_unlock(&block_group->space_info->lock);
@@ -1158,7 +1163,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
11581163
}
11591164

11601165
num_bytes = cache->length - cache->reserved - cache->pinned -
1161-
cache->bytes_super - cache->used;
1166+
cache->bytes_super - cache->zone_unusable - cache->used;
11621167

11631168
/*
11641169
* Data never overcommits, even in mixed mode, so do just the straight
@@ -1189,6 +1194,12 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
11891194

11901195
if (!ret) {
11911196
sinfo->bytes_readonly += num_bytes;
1197+
if (btrfs_is_zoned(cache->fs_info)) {
1198+
/* Migrate zone_unusable bytes to readonly */
1199+
sinfo->bytes_readonly += cache->zone_unusable;
1200+
sinfo->bytes_zone_unusable -= cache->zone_unusable;
1201+
cache->zone_unusable = 0;
1202+
}
11921203
cache->ro++;
11931204
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
11941205
}
@@ -1876,12 +1887,20 @@ static int read_one_block_group(struct btrfs_fs_info *info,
18761887
}
18771888

18781889
/*
1879-
* Check for two cases, either we are full, and therefore don't need
1880-
* to bother with the caching work since we won't find any space, or we
1881-
* are empty, and we can just add all the space in and be done with it.
1882-
* This saves us _a_lot_ of time, particularly in the full case.
1890+
* For zoned filesystem, space after the allocation offset is the only
1891+
* free space for a block group. So, we don't need any caching work.
1892+
* btrfs_calc_zone_unusable() will set the amount of free space and
1893+
* zone_unusable space.
1894+
*
1895+
* For regular filesystem, check for two cases, either we are full, and
1896+
* therefore don't need to bother with the caching work since we won't
1897+
* find any space, or we are empty, and we can just add all the space
1898+
* in and be done with it. This saves us _a_lot_ of time, particularly
1899+
* in the full case.
18831900
*/
1884-
if (cache->length == cache->used) {
1901+
if (btrfs_is_zoned(info)) {
1902+
btrfs_calc_zone_unusable(cache);
1903+
} else if (cache->length == cache->used) {
18851904
cache->last_byte_to_unpin = (u64)-1;
18861905
cache->cached = BTRFS_CACHE_FINISHED;
18871906
btrfs_free_excluded_extents(cache);
@@ -1900,7 +1919,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
19001919
}
19011920
trace_btrfs_add_block_group(info, cache, 0);
19021921
btrfs_update_space_info(info, cache->flags, cache->length,
1903-
cache->used, cache->bytes_super, &space_info);
1922+
cache->used, cache->bytes_super,
1923+
cache->zone_unusable, &space_info);
19041924

19051925
cache->space_info = space_info;
19061926

@@ -1956,7 +1976,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
19561976
break;
19571977
}
19581978
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
1959-
0, &space_info);
1979+
0, 0, &space_info);
19601980
bg->space_info = space_info;
19611981
link_block_group(bg);
19621982

@@ -2197,7 +2217,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
21972217
*/
21982218
trace_btrfs_add_block_group(fs_info, cache, 1);
21992219
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
2200-
cache->bytes_super, &cache->space_info);
2220+
cache->bytes_super, 0, &cache->space_info);
22012221
btrfs_update_global_block_rsv(fs_info);
22022222

22032223
link_block_group(cache);
@@ -2305,8 +2325,15 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
23052325
spin_lock(&cache->lock);
23062326
if (!--cache->ro) {
23072327
num_bytes = cache->length - cache->reserved -
2308-
cache->pinned - cache->bytes_super - cache->used;
2328+
cache->pinned - cache->bytes_super -
2329+
cache->zone_unusable - cache->used;
23092330
sinfo->bytes_readonly -= num_bytes;
2331+
if (btrfs_is_zoned(cache->fs_info)) {
2332+
/* Migrate zone_unusable bytes back */
2333+
cache->zone_unusable = cache->alloc_offset - cache->used;
2334+
sinfo->bytes_zone_unusable += cache->zone_unusable;
2335+
sinfo->bytes_readonly -= cache->zone_unusable;
2336+
}
23102337
list_del_init(&cache->ro_list);
23112338
}
23122339
spin_unlock(&cache->lock);

fs/btrfs/block-group.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ struct btrfs_block_group {
189189
* allocation. This is used only on a zoned filesystem.
190190
*/
191191
u64 alloc_offset;
192+
u64 zone_unusable;
192193
};
193194

194195
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)

fs/btrfs/extent-tree.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "block-group.h"
3535
#include "discard.h"
3636
#include "rcu-string.h"
37+
#include "zoned.h"
3738

3839
#undef SCRAMBLE_DELAYED_REFS
3940

@@ -2740,6 +2741,10 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
27402741
if (cache->ro) {
27412742
space_info->bytes_readonly += len;
27422743
readonly = true;
2744+
} else if (btrfs_is_zoned(fs_info)) {
2745+
/* Need reset before reusing in a zoned block group */
2746+
space_info->bytes_zone_unusable += len;
2747+
readonly = true;
27432748
}
27442749
spin_unlock(&cache->lock);
27452750
if (!readonly && return_free_space &&

fs/btrfs/free-space-cache.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2477,6 +2477,8 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
24772477
int ret = 0;
24782478
u64 filter_bytes = bytes;
24792479

2480+
ASSERT(!btrfs_is_zoned(fs_info));
2481+
24802482
info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
24812483
if (!info)
24822484
return -ENOMEM;
@@ -2534,11 +2536,49 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
25342536
return ret;
25352537
}
25362538

2539+
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
2540+
u64 bytenr, u64 size, bool used)
2541+
{
2542+
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2543+
u64 offset = bytenr - block_group->start;
2544+
u64 to_free, to_unusable;
2545+
2546+
spin_lock(&ctl->tree_lock);
2547+
if (!used)
2548+
to_free = size;
2549+
else if (offset >= block_group->alloc_offset)
2550+
to_free = size;
2551+
else if (offset + size <= block_group->alloc_offset)
2552+
to_free = 0;
2553+
else
2554+
to_free = offset + size - block_group->alloc_offset;
2555+
to_unusable = size - to_free;
2556+
2557+
ctl->free_space += to_free;
2558+
block_group->zone_unusable += to_unusable;
2559+
spin_unlock(&ctl->tree_lock);
2560+
if (!used) {
2561+
spin_lock(&block_group->lock);
2562+
block_group->alloc_offset -= size;
2563+
spin_unlock(&block_group->lock);
2564+
}
2565+
2566+
/* All the region is now unusable. Mark it as unused and reclaim */
2567+
if (block_group->zone_unusable == block_group->length)
2568+
btrfs_mark_bg_unused(block_group);
2569+
2570+
return 0;
2571+
}
2572+
25372573
int btrfs_add_free_space(struct btrfs_block_group *block_group,
25382574
u64 bytenr, u64 size)
25392575
{
25402576
enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
25412577

2578+
if (btrfs_is_zoned(block_group->fs_info))
2579+
return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2580+
true);
2581+
25422582
if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
25432583
trim_state = BTRFS_TRIM_STATE_TRIMMED;
25442584

@@ -2547,6 +2587,16 @@ int btrfs_add_free_space(struct btrfs_block_group *block_group,
25472587
bytenr, size, trim_state);
25482588
}
25492589

2590+
int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
2591+
u64 bytenr, u64 size)
2592+
{
2593+
if (btrfs_is_zoned(block_group->fs_info))
2594+
return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2595+
false);
2596+
2597+
return btrfs_add_free_space(block_group, bytenr, size);
2598+
}
2599+
25502600
/*
25512601
* This is a subtle distinction because when adding free space back in general,
25522602
* we want it to be added as untrimmed for async. But in the case where we add
@@ -2557,6 +2607,10 @@ int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
25572607
{
25582608
enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
25592609

2610+
if (btrfs_is_zoned(block_group->fs_info))
2611+
return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2612+
true);
2613+
25602614
if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
25612615
btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
25622616
trim_state = BTRFS_TRIM_STATE_TRIMMED;
@@ -2574,6 +2628,9 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
25742628
int ret;
25752629
bool re_search = false;
25762630

2631+
if (btrfs_is_zoned(block_group->fs_info))
2632+
return 0;
2633+
25772634
spin_lock(&ctl->tree_lock);
25782635

25792636
again:
@@ -2668,6 +2725,16 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
26682725
struct rb_node *n;
26692726
int count = 0;
26702727

2728+
/*
2729+
* Zoned btrfs does not use free space tree and cluster. Just print
2730+
* out the free space after the allocation offset.
2731+
*/
2732+
if (btrfs_is_zoned(fs_info)) {
2733+
btrfs_info(fs_info, "free space %llu",
2734+
block_group->length - block_group->alloc_offset);
2735+
return;
2736+
}
2737+
26712738
spin_lock(&ctl->tree_lock);
26722739
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
26732740
info = rb_entry(n, struct btrfs_free_space, offset_index);

fs/btrfs/free-space-cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
107107
enum btrfs_trim_state trim_state);
108108
int btrfs_add_free_space(struct btrfs_block_group *block_group,
109109
u64 bytenr, u64 size);
110+
int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
111+
u64 bytenr, u64 size);
110112
int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
111113
u64 bytenr, u64 size);
112114
int btrfs_remove_free_space(struct btrfs_block_group *block_group,

fs/btrfs/space-info.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
169169
ASSERT(s_info);
170170
return s_info->bytes_used + s_info->bytes_reserved +
171171
s_info->bytes_pinned + s_info->bytes_readonly +
172+
s_info->bytes_zone_unusable +
172173
(may_use_included ? s_info->bytes_may_use : 0);
173174
}
174175

@@ -264,7 +265,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
264265

265266
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
266267
u64 total_bytes, u64 bytes_used,
267-
u64 bytes_readonly,
268+
u64 bytes_readonly, u64 bytes_zone_unusable,
268269
struct btrfs_space_info **space_info)
269270
{
270271
struct btrfs_space_info *found;
@@ -280,6 +281,7 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
280281
found->bytes_used += bytes_used;
281282
found->disk_used += bytes_used * factor;
282283
found->bytes_readonly += bytes_readonly;
284+
found->bytes_zone_unusable += bytes_zone_unusable;
283285
if (total_bytes > 0)
284286
found->full = 0;
285287
btrfs_try_granting_tickets(info, found);
@@ -429,10 +431,10 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
429431
info->total_bytes - btrfs_space_info_used(info, true),
430432
info->full ? "" : "not ");
431433
btrfs_info(fs_info,
432-
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
434+
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
433435
info->total_bytes, info->bytes_used, info->bytes_pinned,
434436
info->bytes_reserved, info->bytes_may_use,
435-
info->bytes_readonly);
437+
info->bytes_readonly, info->bytes_zone_unusable);
436438

437439
DUMP_BLOCK_RSV(fs_info, global_block_rsv);
438440
DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
@@ -461,9 +463,10 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
461463
list_for_each_entry(cache, &info->block_groups[index], list) {
462464
spin_lock(&cache->lock);
463465
btrfs_info(fs_info,
464-
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
466+
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
465467
cache->start, cache->length, cache->used, cache->pinned,
466-
cache->reserved, cache->ro ? "[readonly]" : "");
468+
cache->reserved, cache->zone_unusable,
469+
cache->ro ? "[readonly]" : "");
467470
spin_unlock(&cache->lock);
468471
btrfs_dump_free_space(cache, bytes);
469472
}

fs/btrfs/space-info.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ struct btrfs_space_info {
1717
u64 bytes_may_use; /* number of bytes that may be used for
1818
delalloc/allocations */
1919
u64 bytes_readonly; /* total bytes that are read only */
20+
u64 bytes_zone_unusable; /* total bytes that are unusable until
21+
resetting the device zone */
2022

2123
u64 max_extent_size; /* This will hold the maximum extent size of
2224
the space info if we had an ENOSPC in the
@@ -123,7 +125,7 @@ DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
123125
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
124126
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
125127
u64 total_bytes, u64 bytes_used,
126-
u64 bytes_readonly,
128+
u64 bytes_readonly, u64 bytes_zone_unusable,
127129
struct btrfs_space_info **space_info);
128130
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
129131
u64 flags);

fs/btrfs/sysfs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ SPACE_INFO_ATTR(bytes_pinned);
666666
SPACE_INFO_ATTR(bytes_reserved);
667667
SPACE_INFO_ATTR(bytes_may_use);
668668
SPACE_INFO_ATTR(bytes_readonly);
669+
SPACE_INFO_ATTR(bytes_zone_unusable);
669670
SPACE_INFO_ATTR(disk_used);
670671
SPACE_INFO_ATTR(disk_total);
671672
BTRFS_ATTR(space_info, total_bytes_pinned,
@@ -679,6 +680,7 @@ static struct attribute *space_info_attrs[] = {
679680
BTRFS_ATTR_PTR(space_info, bytes_reserved),
680681
BTRFS_ATTR_PTR(space_info, bytes_may_use),
681682
BTRFS_ATTR_PTR(space_info, bytes_readonly),
683+
BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
682684
BTRFS_ATTR_PTR(space_info, disk_used),
683685
BTRFS_ATTR_PTR(space_info, disk_total),
684686
BTRFS_ATTR_PTR(space_info, total_bytes_pinned),

fs/btrfs/zoned.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,3 +1160,24 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
11601160

11611161
return ret;
11621162
}
1163+
1164+
void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
1165+
{
1166+
u64 unusable, free;
1167+
1168+
if (!btrfs_is_zoned(cache->fs_info))
1169+
return;
1170+
1171+
WARN_ON(cache->bytes_super != 0);
1172+
unusable = cache->alloc_offset - cache->used;
1173+
free = cache->length - cache->alloc_offset;
1174+
1175+
/* We only need ->free_space in ALLOC_SEQ block groups */
1176+
cache->last_byte_to_unpin = (u64)-1;
1177+
cache->cached = BTRFS_CACHE_FINISHED;
1178+
cache->free_space_ctl->free_space = free;
1179+
cache->zone_unusable = unusable;
1180+
1181+
/* Should not have any excluded extents. Just in case, though */
1182+
btrfs_free_excluded_extents(cache);
1183+
}

fs/btrfs/zoned.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
4242
u64 length, u64 *bytes);
4343
int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
4444
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
45+
void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
4546
#else /* CONFIG_BLK_DEV_ZONED */
4647
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
4748
struct blk_zone *zone)
@@ -123,6 +124,8 @@ static inline int btrfs_load_block_group_zone_info(
123124
return 0;
124125
}
125126

127+
static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
128+
126129
#endif
127130

128131
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

0 commit comments

Comments
 (0)