Skip to content

Commit fa1a0f4

Browse files
naotakdave
authored andcommitted
btrfs: zoned: serialize log transaction on zoned filesystems
This is the 2/3 patch to enable tree-log on zoned filesystems. Since we can start more than one log transactions per subvolume simultaneously, nodes from multiple transactions can be allocated interleaved. Such mixed allocation results in non-sequential writes at the time of a log transaction commit. The nodes of the global log root tree (fs_info->log_root_tree), also have the same problem with mixed allocation. Serializes log transactions by waiting for a committing transaction when someone tries to start a new transaction, to avoid the mixed allocation problem. We must also wait for running log transactions from another subvolume, but there is no easy way to detect which subvolume root is running a log transaction. So, this patch forbids starting a new log transaction when other subvolumes already allocated the global log root tree. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 40ab3be commit fa1a0f4

File tree

1 file changed

+33
-1
lines changed

1 file changed

+33
-1
lines changed

fs/btrfs/tree-log.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
105105
struct btrfs_root *log,
106106
struct btrfs_path *path,
107107
u64 dirid, int del_all);
108+
static void wait_log_commit(struct btrfs_root *root, int transid);
108109

109110
/*
110111
* tree logging is a special write ahead log used to make sure that
@@ -140,7 +141,9 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
140141
{
141142
struct btrfs_fs_info *fs_info = root->fs_info;
142143
struct btrfs_root *tree_root = fs_info->tree_root;
144+
const bool zoned = btrfs_is_zoned(fs_info);
143145
int ret = 0;
146+
bool created = false;
144147

145148
/*
146149
* First check if the log root tree was already created. If not, create
@@ -150,8 +153,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
150153
mutex_lock(&tree_root->log_mutex);
151154
if (!fs_info->log_root_tree) {
152155
ret = btrfs_init_log_root_tree(trans, fs_info);
153-
if (!ret)
156+
if (!ret) {
154157
set_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state);
158+
created = true;
159+
}
155160
}
156161
mutex_unlock(&tree_root->log_mutex);
157162
if (ret)
@@ -160,19 +165,38 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
160165

161166
mutex_lock(&root->log_mutex);
162167

168+
again:
163169
if (root->log_root) {
170+
int index = (root->log_transid + 1) % 2;
171+
164172
if (btrfs_need_log_full_commit(trans)) {
165173
ret = -EAGAIN;
166174
goto out;
167175
}
168176

177+
if (zoned && atomic_read(&root->log_commit[index])) {
178+
wait_log_commit(root, root->log_transid - 1);
179+
goto again;
180+
}
181+
169182
if (!root->log_start_pid) {
170183
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
171184
root->log_start_pid = current->pid;
172185
} else if (root->log_start_pid != current->pid) {
173186
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
174187
}
175188
} else {
189+
/*
190+
* This means fs_info->log_root_tree was already created
191+
* for some other FS trees. Do the full commit not to mix
192+
* nodes from multiple log transactions to do sequential
193+
* writing.
194+
*/
195+
if (zoned && !created) {
196+
ret = -EAGAIN;
197+
goto out;
198+
}
199+
176200
ret = btrfs_add_log_tree(trans, root);
177201
if (ret)
178202
goto out;
@@ -201,14 +225,22 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
201225
*/
202226
static int join_running_log_trans(struct btrfs_root *root)
203227
{
228+
const bool zoned = btrfs_is_zoned(root->fs_info);
204229
int ret = -ENOENT;
205230

206231
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
207232
return ret;
208233

209234
mutex_lock(&root->log_mutex);
235+
again:
210236
if (root->log_root) {
237+
int index = (root->log_transid + 1) % 2;
238+
211239
ret = 0;
240+
if (zoned && atomic_read(&root->log_commit[index])) {
241+
wait_log_commit(root, root->log_transid - 1);
242+
goto again;
243+
}
212244
atomic_inc(&root->log_writers);
213245
}
214246
mutex_unlock(&root->log_mutex);

0 commit comments

Comments
 (0)