Skip to content

Commit

Permalink
btrfs: Validate child tree block's level and first key
Browse files Browse the repository at this point in the history
We have several reports about node pointer points to incorrect child
tree blocks, which could have even wrong owner and level but still with
valid generation and checksum.

Although btrfs check could handle it and print error message like:
leaf parent key incorrect 60670574592

Kernel doesn't have enough check on this type of corruption correctly.
At least add such check to read_tree_block() and btrfs_read_buffer(),
where we need two new parameters @Level and @first_key to verify the
child tree block.

The new @Level check is mandatory and all call sites are already
modified to extract expected level from its call chain.

While @first_key is optional, the following call sites are skipping such
check:
1) Root node/leaf
   As ROOT_ITEM doesn't contain the first key, skip @first_key check.
2) Direct backref
   Only parent bytenr and level is known and we need to resolve the key
   all by ourselves, skip @first_key check.

Another note of this verification is, it needs extra info from nodeptr
or ROOT_ITEM, so it can't fit into current tree-checker framework, which
is limited to node/leaf boundary.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
adam900710 authored and kdave committed Mar 31, 2018
1 parent 3c0efdf commit 581c176
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 46 deletions.
6 changes: 4 additions & 2 deletions fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(ref->key_for_search.type);
BUG_ON(!ref->wanted_disk_byte);

eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
ref->level - 1, NULL);
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
Expand Down Expand Up @@ -1288,7 +1289,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
ref->level == 0) {
struct extent_buffer *eb;

eb = read_tree_block(fs_info, ref->parent, 0);
eb = read_tree_block(fs_info, ref->parent, 0,
ref->level, NULL);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
Expand Down
28 changes: 22 additions & 6 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
struct tree_mod_root *old_root = NULL;
u64 old_generation = 0;
u64 logical;
int level;

eb_root = btrfs_read_lock_root_node(root);
tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Expand All @@ -1364,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
old_root = &tm->old_root;
old_generation = tm->generation;
logical = old_root->logical;
level = old_root->level;
} else {
logical = eb_root->start;
level = btrfs_header_level(eb_root);
}

tm = tree_mod_log_search(fs_info, logical, time_seq);
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
old = read_tree_block(fs_info, logical, 0);
old = read_tree_block(fs_info, logical, 0, level, NULL);
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
if (!IS_ERR(old))
free_extent_buffer(old);
Expand Down Expand Up @@ -1592,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
btrfs_set_lock_blocking(parent);

for (i = start_slot; i <= end_slot; i++) {
struct btrfs_key first_key;
int close = 1;

btrfs_node_key(parent, &disk_key, i);
Expand All @@ -1601,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
progress_passed = 1;
blocknr = btrfs_node_blockptr(parent, i);
gen = btrfs_node_ptr_generation(parent, i);
btrfs_node_key_to_cpu(parent, &first_key, i);
if (last_block == 0)
last_block = blocknr;

Expand All @@ -1624,15 +1629,18 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
uptodate = 0;
if (!cur || !uptodate) {
if (!cur) {
cur = read_tree_block(fs_info, blocknr, gen);
cur = read_tree_block(fs_info, blocknr, gen,
parent_level - 1,
&first_key);
if (IS_ERR(cur)) {
return PTR_ERR(cur);
} else if (!extent_buffer_uptodate(cur)) {
free_extent_buffer(cur);
return -EIO;
}
} else if (!uptodate) {
err = btrfs_read_buffer(cur, gen);
err = btrfs_read_buffer(cur, gen,
parent_level - 1,&first_key);
if (err) {
free_extent_buffer(cur);
return err;
Expand Down Expand Up @@ -1785,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
{
int level = btrfs_header_level(parent);
struct extent_buffer *eb;
struct btrfs_key first_key;

if (slot < 0 || slot >= btrfs_header_nritems(parent))
return ERR_PTR(-ENOENT);

BUG_ON(level == 0);

btrfs_node_key_to_cpu(parent, &first_key, slot);
eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
eb = ERR_PTR(-EIO);
Expand Down Expand Up @@ -2388,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
u64 gen;
struct extent_buffer *b = *eb_ret;
struct extent_buffer *tmp;
struct btrfs_key first_key;
int ret;
int parent_level;

blocknr = btrfs_node_blockptr(b, slot);
gen = btrfs_node_ptr_generation(b, slot);
parent_level = btrfs_header_level(b);
btrfs_node_key_to_cpu(b, &first_key, slot);

tmp = find_extent_buffer(fs_info, blocknr);
if (tmp) {
Expand All @@ -2410,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_set_path_blocking(p);

/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen);
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
if (!ret) {
*eb_ret = tmp;
return 0;
Expand All @@ -2437,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_release_path(p);

ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, 0);
tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
Expand Down
95 changes: 82 additions & 13 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return ret;
}

static int verify_level_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int level,
struct btrfs_key *first_key)
{
int found_level;
struct btrfs_key found_key;
int ret;

found_level = btrfs_header_level(eb);
if (found_level != level) {
#ifdef CONFIG_BTRFS_DEBUG
WARN_ON(1);
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
#endif
return -EIO;
}

if (!first_key)
return 0;

if (found_level)
btrfs_node_key_to_cpu(eb, &found_key, 0);
else
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);

#ifdef CONFIG_BTRFS_DEBUG
if (ret) {
WARN_ON(1);
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
eb->start, first_key->objectid, first_key->type,
first_key->offset, found_key.objectid,
found_key.type, found_key.offset);
}
#endif
return ret;
}

/*
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*
* @parent_transid: expected transid, skip check if 0
* @level: expected level, mandatory check
* @first_key: expected key of first slot, skip check if NULL
*/
static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
u64 parent_transid)
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct extent_io_tree *io_tree;
int failed = 0;
Expand All @@ -448,19 +494,23 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
if (!ret) {
if (!verify_parent_transid(io_tree, eb,
if (verify_parent_transid(io_tree, eb,
parent_transid, 0))
break;
else
ret = -EIO;
else if (verify_level_key(fs_info, eb, level,
first_key))
ret = -EUCLEAN;
else
break;
}

/*
* This buffer's crc is fine, but its contents are corrupted, so
* there is no reason to read the other copies, they won't be
* any less wrong.
*/
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
ret == -EUCLEAN)
break;

num_copies = btrfs_num_copies(fs_info,
Expand Down Expand Up @@ -1049,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
buf->start, buf->start + buf->len - 1);
}

/*
* Read tree block at logical address @bytenr and do variant basic but critical
* verification.
*
* @parent_transid: expected transid of this tree block, skip check if 0
* @level: expected level, mandatory check
* @first_key: expected key in slot 0, skip check if NULL
*/
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid)
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct extent_buffer *buf = NULL;
int ret;
Expand All @@ -1059,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
if (IS_ERR(buf))
return buf;

ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
free_extent_buffer(buf);
return ERR_PTR(ret);
Expand Down Expand Up @@ -1388,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_path *path;
u64 generation;
int ret;
int level;

path = btrfs_alloc_path();
if (!path)
Expand All @@ -1410,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
}

generation = btrfs_root_generation(&root->root_item);
level = btrfs_root_level(&root->root_item);
root->node = read_tree_block(fs_info,
btrfs_root_bytenr(&root->root_item),
generation);
generation, level, NULL);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
goto find_fail;
Expand Down Expand Up @@ -2261,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
struct btrfs_root *log_tree_root;
struct btrfs_super_block *disk_super = fs_info->super_copy;
u64 bytenr = btrfs_super_log_root(disk_super);
int level = btrfs_super_log_root_level(disk_super);

if (fs_devices->rw_devices == 0) {
btrfs_warn(fs_info, "log replay required on RO media");
Expand All @@ -2274,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
__setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);

log_tree_root->node = read_tree_block(fs_info, bytenr,
fs_info->generation + 1);
fs_info->generation + 1,
level, NULL);
if (IS_ERR(log_tree_root->node)) {
btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
Expand Down Expand Up @@ -2390,6 +2454,7 @@ int open_ctree(struct super_block *sb,
int num_backups_tried = 0;
int backup_index = 0;
int clear_free_space_tree = 0;
int level;

tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
Expand Down Expand Up @@ -2725,12 +2790,13 @@ int open_ctree(struct super_block *sb,
}

generation = btrfs_super_chunk_root_generation(disk_super);
level = btrfs_super_chunk_root_level(disk_super);

__setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);

chunk_root->node = read_tree_block(fs_info,
btrfs_super_chunk_root(disk_super),
generation);
generation, level, NULL);
if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) {
btrfs_err(fs_info, "failed to read chunk root");
Expand Down Expand Up @@ -2764,10 +2830,11 @@ int open_ctree(struct super_block *sb,

retry_root_backup:
generation = btrfs_super_generation(disk_super);
level = btrfs_super_root_level(disk_super);

tree_root->node = read_tree_block(fs_info,
btrfs_super_root(disk_super),
generation);
generation, level, NULL);
if (IS_ERR(tree_root->node) ||
!extent_buffer_uptodate(tree_root->node)) {
btrfs_warn(fs_info, "failed to read tree root");
Expand Down Expand Up @@ -3887,12 +3954,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
__btrfs_btree_balance_dirty(fs_info, 0);
}

int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;

return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
}

static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
Expand Down
8 changes: 5 additions & 3 deletions fs/btrfs/disk-io.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;

struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
int mirror_num, struct extent_buffer **eb);
Expand Down Expand Up @@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
Expand Down
6 changes: 5 additions & 1 deletion fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -8710,6 +8710,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 parent;
u32 blocksize;
struct btrfs_key key;
struct btrfs_key first_key;
struct extent_buffer *next;
int level = wc->level;
int reada = 0;
Expand All @@ -8730,6 +8731,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}

bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
path->slots[level]);
blocksize = fs_info->nodesize;

next = find_extent_buffer(fs_info, bytenr);
Expand Down Expand Up @@ -8794,7 +8797,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (!next) {
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(fs_info, bytenr, generation);
next = read_tree_block(fs_info, bytenr, generation, level - 1,
&first_key);
if (IS_ERR(next)) {
return PTR_ERR(next);
} else if (!extent_buffer_uptodate(next)) {
Expand Down
Loading

0 comments on commit 581c176

Please sign in to comment.