Message ID | 20241011034502.24009-1-robbieko@synology.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] btrfs: reduce lock contention when eb cache miss for btree search | expand |
On Fri, Oct 11, 2024 at 4:54 AM robbieko <robbieko@synology.com> wrote: > > From: Robbie Ko <robbieko@synology.com> > > When crawling btree, if an eb cache miss occurs, we change to use > the eb read lock and release all previous locks to reduce lock contention. > > Because we have prepared the check parameters and the read lock > of eb we hold, we can ensure that no race will occur during the check > and cause unexpected errors. So this is a bit too terse, in your reply to my questions for the v1 patch, you gave a much better explanation, that this reduces lock contention on nodes at level 1. Before this change we released locks only from level 2 and up and we read a leaf's content from disk while holding a lock on its parent (level 1), causing the unnecessary lock contention on the parent. Please include that in the change log, it makes everything a lot clear. > > v2: > * Add skip_locking handle Information about patch version and what changed doesn't belong here, it's pointless to have in the git log since the previous version isn't in git. > > Signed-off-by: Robbie Ko <robbieko@synology.com> > --- The patch version and what changed information belongs here, after the ---, which causes 'git am' to drop it. > fs/btrfs/ctree.c | 104 ++++++++++++++++++++++++++++++++--------------- > 1 file changed, 72 insertions(+), 32 deletions(-) > > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > index 0cc919d15b14..ad3e734f09f6 100644 > --- a/fs/btrfs/ctree.c > +++ b/fs/btrfs/ctree.c > @@ -1515,12 +1515,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > struct btrfs_tree_parent_check check = { 0 }; > u64 blocknr; > u64 gen; > - struct extent_buffer *tmp; > - int ret; > + struct extent_buffer *tmp = NULL; > + int ret = 0; > int parent_level; > - bool unlock_up; > + int err; > + bool create = false; So this is better named as "read_tmp", because it's used to indicate that we are reading the tmp extent buffer's pages from disk. > + bool lock = false; And here naming it as "tmp_locked", because it's used to indicate if we have locked the tmp extent buffer. As it is named, "lock", is a bit too vague and gives the idea it means to lock something later on after some condition is met. > + bool skip_locking = p->skip_locking; This is unnecessary, we never change it and we can simply use p->skip_locking directly, as it never changes too and makes it clear it comes from the path when reading code below. > + bool path_released = false; > > - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); > blocknr = btrfs_node_blockptr(*eb_ret, slot); > gen = btrfs_node_ptr_generation(*eb_ret, slot); > parent_level = btrfs_header_level(*eb_ret); > @@ -1551,68 +1554,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > */ > if (btrfs_verify_level_key(tmp, > parent_level - 1, &check.first_key, gen)) { > - free_extent_buffer(tmp); > - return -EUCLEAN; > + ret = -EUCLEAN; > + goto out; > } > *eb_ret = tmp; > - return 0; > + tmp = NULL; > + ret = 0; > + goto out; > } > > if (p->nowait) { > - free_extent_buffer(tmp); > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) > + if (!skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > - > - /* now we're allowed to do a blocking uptodate check */ > - ret = btrfs_read_extent_buffer(tmp, &check); > - if (ret) { > - free_extent_buffer(tmp); > + lock = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return ret; > + ret = -EAGAIN; > + path_released = true; > } > > - if (unlock_up) > - ret = -EAGAIN; > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > + } > > + if (!ret) { To be consistent with the rest of the function's code, and because it's an integer and not a boolean, the style "ret == 0" would be more clear. > + ASSERT(!lock); > + *eb_ret = tmp; > + tmp = NULL; > + } > goto out; > } else if (p->nowait) { > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) { > + if (!skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > ret = -EAGAIN; > - } else { > - ret = 0; > } > > if (p->reada != READA_NONE) > reada_for_search(fs_info, p, level, slot, key->objectid); > > - tmp = read_tree_block(fs_info, blocknr, &check); > + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level); > if (IS_ERR(tmp)) { > + ret = PTR_ERR(tmp); > + tmp = NULL; > + goto out; > + } > + create = true; > + > + if (!skip_locking) { > + ASSERT(ret); > + lock = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return PTR_ERR(tmp); > + path_released = true; > + } > + > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > } > + > /* > * If the read above didn't mark this buffer up to date, > * it will never end up being up to date. Set ret to EIO now > * and give up so that our caller doesn't loop forever > * on our EAGAINs. > */ > - if (!extent_buffer_uptodate(tmp)) > + if (!extent_buffer_uptodate(tmp)) { > ret = -EIO; > + goto out; > + } > > -out: > - if (ret == 0) { > + if (!ret) { And leave the original "ret == 0", it was just fine, easier, more direct to read, as it makes it clear it's an integer and not a boolean type. Otherwise it looks fine, thanks. > + ASSERT(!lock); > *eb_ret = tmp; > - } else { > - free_extent_buffer(tmp); > - btrfs_release_path(p); > + tmp = NULL; > + } > +out: > + if (tmp) { > + if (lock) > + btrfs_tree_read_unlock(tmp); > + if (create && ret && ret != -EAGAIN) > + free_extent_buffer_stale(tmp); > + else > + free_extent_buffer(tmp); > } > + if (ret && !path_released) > + btrfs_release_path(p); > > return ret; > } > @@ -2198,7 +2238,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > @@ -2325,7 +2365,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > -- > 2.17.1 > >
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0cc919d15b14..ad3e734f09f6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1515,12 +1515,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, struct btrfs_tree_parent_check check = { 0 }; u64 blocknr; u64 gen; - struct extent_buffer *tmp; - int ret; + struct extent_buffer *tmp = NULL; + int ret = 0; int parent_level; - bool unlock_up; + int err; + bool create = false; + bool lock = false; + bool skip_locking = p->skip_locking; + bool path_released = false; - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); blocknr = btrfs_node_blockptr(*eb_ret, slot); gen = btrfs_node_ptr_generation(*eb_ret, slot); parent_level = btrfs_header_level(*eb_ret); @@ -1551,68 +1554,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, */ if (btrfs_verify_level_key(tmp, parent_level - 1, &check.first_key, gen)) { - free_extent_buffer(tmp); - return -EUCLEAN; + ret = -EUCLEAN; + goto out; } *eb_ret = tmp; - return 0; + tmp = NULL; + ret = 0; + goto out; } if (p->nowait) { - free_extent_buffer(tmp); - return -EAGAIN; + ret = -EAGAIN; + goto out; } - if (unlock_up) + if (!skip_locking) { btrfs_unlock_up_safe(p, level + 1); - - /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_extent_buffer(tmp, &check); - if (ret) { - free_extent_buffer(tmp); + lock = true; + btrfs_tree_read_lock(tmp); btrfs_release_path(p); - return ret; + ret = -EAGAIN; + path_released = true; } - if (unlock_up) - ret = -EAGAIN; + /* Now we're allowed to do a blocking uptodate check. */ + err = btrfs_read_extent_buffer(tmp, &check); + if (err) { + ret = err; + goto out; + } + if (!ret) { + ASSERT(!lock); + *eb_ret = tmp; + tmp = NULL; + } goto out; } else if (p->nowait) { - return -EAGAIN; + ret = -EAGAIN; + goto out; } - if (unlock_up) { + if (!skip_locking) { btrfs_unlock_up_safe(p, level + 1); ret = -EAGAIN; - } else { - ret = 0; } if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - tmp = read_tree_block(fs_info, blocknr, &check); + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level); if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + tmp = NULL; + goto out; + } + create = true; + + if (!skip_locking) { + ASSERT(ret); + lock = true; + btrfs_tree_read_lock(tmp); btrfs_release_path(p); - return PTR_ERR(tmp); + path_released = true; + } + + /* Now we're allowed to do a blocking uptodate check. */ + err = btrfs_read_extent_buffer(tmp, &check); + if (err) { + ret = err; + goto out; } + /* * If the read above didn't mark this buffer up to date, * it will never end up being up to date. Set ret to EIO now * and give up so that our caller doesn't loop forever * on our EAGAINs. */ - if (!extent_buffer_uptodate(tmp)) + if (!extent_buffer_uptodate(tmp)) { ret = -EIO; + goto out; + } -out: - if (ret == 0) { + if (!ret) { + ASSERT(!lock); *eb_ret = tmp; - } else { - free_extent_buffer(tmp); - btrfs_release_path(p); + tmp = NULL; + } +out: + if (tmp) { + if (lock) + btrfs_tree_read_unlock(tmp); + if (create && ret && ret != -EAGAIN) + free_extent_buffer_stale(tmp); + else + free_extent_buffer(tmp); } + if (ret && !path_released) + btrfs_release_path(p); return ret; } @@ -2198,7 +2238,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, } err = read_block_for_search(root, p, &b, level, slot, key); - if (err == -EAGAIN) + if (err == -EAGAIN && !p->nowait) goto again; if (err) { ret = err; @@ -2325,7 +2365,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, } err = read_block_for_search(root, p, &b, level, slot, key); - if (err == -EAGAIN) + if (err == -EAGAIN && !p->nowait) goto again; if (err) { ret = err;