Message ID | 20241015074137.26067-1-robbieko@synology.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] btrfs: reduce lock contention when eb cache miss for btree search | expand |
On Tue, Oct 15, 2024 at 8:41 AM robbieko <robbieko@synology.com> wrote: > > From: Robbie Ko <robbieko@synology.com> > > When crawling btree, if an eb cache miss occurs, we change to use > the eb read lock and release all previous locks (include parent lock) > to reduce lock contention. > > If a eb cache miss occurs in a leaf and needs to execute IO, before > this change we released locks only from level 2 and up and we > read a leaf's content from disk while holding a lock on its parent > (level 1), causing the unnecessary lock contention on the parent, > after this change we release locks from level 1 and up, but we lock > level 0, and read leaf's content from disk. > > Because we have prepared the check parameters and the read lock > of eb we hold, we can ensure that no race will occur during the check > and cause unexpected errors. > > Signed-off-by: Robbie Ko <robbieko@synology.com> > --- > v3: Improve the change log, and change variable named > v2: Add skip_locking handle > fs/btrfs/ctree.c | 101 ++++++++++++++++++++++++++++++++--------------- > 1 file changed, 70 insertions(+), 31 deletions(-) > > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > index 0cc919d15b14..dc1d4e05214a 100644 > --- a/fs/btrfs/ctree.c > +++ b/fs/btrfs/ctree.c > @@ -1515,12 +1515,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > struct btrfs_tree_parent_check check = { 0 }; > u64 blocknr; > u64 gen; > - struct extent_buffer *tmp; > - int ret; > + struct extent_buffer *tmp = NULL; > + int ret = 0; > int parent_level; > - bool unlock_up; > + int err; > + bool read_tmp = false; > + bool tmp_locked = false; > + bool path_released = false; > > - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); > blocknr = btrfs_node_blockptr(*eb_ret, slot); > gen = btrfs_node_ptr_generation(*eb_ret, slot); > parent_level = btrfs_header_level(*eb_ret); > @@ -1551,68 +1553,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > */ > if (btrfs_verify_level_key(tmp, > parent_level - 1, &check.first_key, gen)) { > - free_extent_buffer(tmp); > - return -EUCLEAN; > + ret = -EUCLEAN; > + goto out; > } > *eb_ret = tmp; > - return 0; > + tmp = NULL; > + ret = 0; > + goto out; > } > > if (p->nowait) { > - free_extent_buffer(tmp); > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) > + if (!p->skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > - > - /* now we're allowed to do a blocking uptodate check */ > - ret = btrfs_read_extent_buffer(tmp, &check); > - if (ret) { > - free_extent_buffer(tmp); > + tmp_locked = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return ret; > + ret = -EAGAIN; > + path_released = true; > } > > - if (unlock_up) > - ret = -EAGAIN; > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > + } > > + if (ret == 0) { > + ASSERT(!tmp_locked); > + *eb_ret = tmp; > + tmp = NULL; > + } > goto out; > } else if (p->nowait) { > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) { > + if (!p->skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > ret = -EAGAIN; > - } else { > - ret = 0; > } > > if (p->reada != READA_NONE) > reada_for_search(fs_info, p, level, slot, key->objectid); > > - tmp = read_tree_block(fs_info, blocknr, &check); > + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level); > if (IS_ERR(tmp)) { > + ret = PTR_ERR(tmp); > + tmp = NULL; > + goto out; > + } > + read_tmp = true; > + > + if (!p->skip_locking) { > + ASSERT(ret); We can be more explicit here and do instead: ASSERT(ret == -EAGAIN); The patch looks good to me, and I'll soon add it to the for-next branch, after some testing, with that small change if you're ok with it. Reviewed-by: Filipe Manana <fdmanana@suse.com> Thanks. > + tmp_locked = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return PTR_ERR(tmp); > + path_released = true; > + } > + > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > } > + > /* > * If the read above didn't mark this buffer up to date, > * it will never end up being up to date. Set ret to EIO now > * and give up so that our caller doesn't loop forever > * on our EAGAINs. > */ > - if (!extent_buffer_uptodate(tmp)) > + if (!extent_buffer_uptodate(tmp)) { > ret = -EIO; > + goto out; > + } > > -out: > if (ret == 0) { > + ASSERT(!tmp_locked); > *eb_ret = tmp; > - } else { > - free_extent_buffer(tmp); > - btrfs_release_path(p); > + tmp = NULL; > + } > +out: > + if (tmp) { > + if (tmp_locked) > + btrfs_tree_read_unlock(tmp); > + if (read_tmp && ret && ret != -EAGAIN) > + free_extent_buffer_stale(tmp); > + else > + free_extent_buffer(tmp); > } > + if (ret && !path_released) > + btrfs_release_path(p); > > return ret; > } > @@ -2198,7 +2237,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > @@ -2325,7 +2364,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > -- > 2.17.1 > >
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0cc919d15b14..dc1d4e05214a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1515,12 +1515,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, struct btrfs_tree_parent_check check = { 0 }; u64 blocknr; u64 gen; - struct extent_buffer *tmp; - int ret; + struct extent_buffer *tmp = NULL; + int ret = 0; int parent_level; - bool unlock_up; + int err; + bool read_tmp = false; + bool tmp_locked = false; + bool path_released = false; - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); blocknr = btrfs_node_blockptr(*eb_ret, slot); gen = btrfs_node_ptr_generation(*eb_ret, slot); parent_level = btrfs_header_level(*eb_ret); @@ -1551,68 +1553,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, */ if (btrfs_verify_level_key(tmp, parent_level - 1, &check.first_key, gen)) { - free_extent_buffer(tmp); - return -EUCLEAN; + ret = -EUCLEAN; + goto out; } *eb_ret = tmp; - return 0; + tmp = NULL; + ret = 0; + goto out; } if (p->nowait) { - free_extent_buffer(tmp); - return -EAGAIN; + ret = -EAGAIN; + goto out; } - if (unlock_up) + if (!p->skip_locking) { btrfs_unlock_up_safe(p, level + 1); - - /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_extent_buffer(tmp, &check); - if (ret) { - free_extent_buffer(tmp); + tmp_locked = true; + btrfs_tree_read_lock(tmp); btrfs_release_path(p); - return ret; + ret = -EAGAIN; + path_released = true; } - if (unlock_up) - ret = -EAGAIN; + /* Now we're allowed to do a blocking uptodate check. */ + err = btrfs_read_extent_buffer(tmp, &check); + if (err) { + ret = err; + goto out; + } + if (ret == 0) { + ASSERT(!tmp_locked); + *eb_ret = tmp; + tmp = NULL; + } goto out; } else if (p->nowait) { - return -EAGAIN; + ret = -EAGAIN; + goto out; } - if (unlock_up) { + if (!p->skip_locking) { btrfs_unlock_up_safe(p, level + 1); ret = -EAGAIN; - } else { - ret = 0; } if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - tmp = read_tree_block(fs_info, blocknr, &check); + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level); if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + tmp = NULL; + goto out; + } + read_tmp = true; + + if (!p->skip_locking) { + ASSERT(ret); + tmp_locked = true; + btrfs_tree_read_lock(tmp); btrfs_release_path(p); - return PTR_ERR(tmp); + path_released = true; + } + + /* Now we're allowed to do a blocking uptodate check. */ + err = btrfs_read_extent_buffer(tmp, &check); + if (err) { + ret = err; + goto out; } + /* * If the read above didn't mark this buffer up to date, * it will never end up being up to date. Set ret to EIO now * and give up so that our caller doesn't loop forever * on our EAGAINs. */ - if (!extent_buffer_uptodate(tmp)) + if (!extent_buffer_uptodate(tmp)) { ret = -EIO; + goto out; + } -out: if (ret == 0) { + ASSERT(!tmp_locked); *eb_ret = tmp; - } else { - free_extent_buffer(tmp); - btrfs_release_path(p); + tmp = NULL; + } +out: + if (tmp) { + if (tmp_locked) + btrfs_tree_read_unlock(tmp); + if (read_tmp && ret && ret != -EAGAIN) + free_extent_buffer_stale(tmp); + else + free_extent_buffer(tmp); } + if (ret && !path_released) + btrfs_release_path(p); return ret; } @@ -2198,7 +2237,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, } err = read_block_for_search(root, p, &b, level, slot, key); - if (err == -EAGAIN) + if (err == -EAGAIN && !p->nowait) goto again; if (err) { ret = err; @@ -2325,7 +2364,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, } err = read_block_for_search(root, p, &b, level, slot, key); - if (err == -EAGAIN) + if (err == -EAGAIN && !p->nowait) goto again; if (err) { ret = err;