Message ID | 412df27129a676f4bca6724960350569573a3791.1623567940.git.rgoldwyn@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs buffered iomap support | expand |
On 13.06.21 г. 16:39, Goldwyn Rodrigues wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > Lock Order change: Extent locks before page locks > > writepage() is called with the page locked. So make an attempt > to lock the extents and proceed only if successful. > > The new function best_effort_lock_extent() attempts to lock the range > provided. > > If the entire range was not locked, but it still covers the locked > page, work with the reduced range by resetting delalloc_end() > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > --- > fs/btrfs/extent_io.c | 66 ++++++++++++++++++++++++++++++++++---------- > 1 file changed, 51 insertions(+), 15 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 9e81d25dea70..75ad809e8c86 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -1488,6 +1488,47 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) > return 1; > } > > +/* > + * best_effort_lock_extent() - locks the extent to the best of effort > + * making sure the minimum range is locked and the > + * delalloc bits are set. If the full requested range is not locked, > + * delalloc_end shifts to until the range that can be locked. > + */ > +static bool best_effort_lock_extent(struct extent_io_tree *tree, u64 start, > + u64 *delalloc_end, u64 min_end, struct extent_state **cached_state) > +{ > + u64 failed_start; > + u64 end = *delalloc_end; > + int ret; > + > + ret = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, > + &failed_start, cached_state, GFP_NOFS, NULL); > + > + if (!ret) > + return false; The locking sequence here seems broken. Based on the behavior of lock_extent_bits : 1. ret = 0 means the range was successfully locked instead you return false. 2. ret can be a number of errors, including EEXIST, the original code basically waits for the extents to be unlocked and retries. > + > + if (failed_start < min_end) { > + /* Could not lock the whole range */ > + if (failed_start > start) > + unlock_extent_cached(tree, start, > + failed_start - 1, cached_state); > + return false; > + } else if (end > failed_start) { > + /* Work with what could be locked */ > + end = failed_start - 1; nit: Here instead of assigning *delalloc_end to end and doing the check I think it's more straightforward to perform the check with failed_start and *delalloc_end and only then assign failed_start -1 to end. > + } > + > + /* Check if delalloc bits are set */ > + ret = test_range_bit(tree, start, end, > + EXTENT_DELALLOC, 1, *cached_state); > + if (!ret) { > + unlock_extent_cached(tree, start, end - 1, cached_state); > + return false; > + } > + *delalloc_end = end; > + return true; > +} > + > void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) > { > unsigned long index = start >> PAGE_SHIFT; > @@ -2018,7 +2059,16 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, > if (delalloc_end + 1 - delalloc_start > max_bytes) > delalloc_end = delalloc_start + max_bytes - 1; > > - /* step two, lock all the pages after the page that has start */ > + /* step two, lock the state bits for the range */ > + found = best_effort_lock_extent(tree, delalloc_start, &delalloc_end, > + ((page_index(locked_page) + 1) << PAGE_SHIFT), page_index(locked_page)+1 means you'd like to ensure you have locked at least 1 page beyond locked page, correct? This looks a bit arbitrary i.e why 1 page and not 2 for example? > + &cached_state); > + if (!found) { > + free_extent_state(cached_state); > + goto out_failed; > + } > + > + /* step three, lock all the pages after the page that has start */ > ret = lock_delalloc_pages(inode, locked_page, > delalloc_start, delalloc_end); So now that this has become step 3 and the extents are locked you should implement relevant error handling which would unlock the extents in case of an error. > ASSERT(!ret || ret == -EAGAIN); > @@ -2038,20 +2088,6 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, > } > } > > - /* step three, lock the state bits for the whole range */ > - lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state); > - > - /* then test to make sure it is all still delalloc */ > - ret = test_range_bit(tree, delalloc_start, delalloc_end, > - EXTENT_DELALLOC, 1, cached_state); > - if (!ret) { > - unlock_extent_cached(tree, delalloc_start, delalloc_end, > - &cached_state); > - __unlock_for_delalloc(inode, locked_page, > - delalloc_start, delalloc_end); > - cond_resched(); > - goto again; > - } > free_extent_state(cached_state); > *start = delalloc_start; > *end = delalloc_end; >
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9e81d25dea70..75ad809e8c86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1488,6 +1488,47 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) return 1; } +/* + * best_effort_lock_extent() - locks the extent to the best of effort + * making sure the minimum range is locked and the + * delalloc bits are set. If the full requested range is not locked, + * delalloc_end shifts to until the range that can be locked. + */ +static bool best_effort_lock_extent(struct extent_io_tree *tree, u64 start, + u64 *delalloc_end, u64 min_end, struct extent_state **cached_state) +{ + u64 failed_start; + u64 end = *delalloc_end; + int ret; + + ret = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, + &failed_start, cached_state, GFP_NOFS, NULL); + + if (!ret) + return false; + + if (failed_start < min_end) { + /* Could not lock the whole range */ + if (failed_start > start) + unlock_extent_cached(tree, start, + failed_start - 1, cached_state); + return false; + } else if (end > failed_start) { + /* Work with what could be locked */ + end = failed_start - 1; + } + + /* Check if delalloc bits are set */ + ret = test_range_bit(tree, start, end, + EXTENT_DELALLOC, 1, *cached_state); + if (!ret) { + unlock_extent_cached(tree, start, end - 1, cached_state); + return false; + } + *delalloc_end = end; + return true; +} + void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) { unsigned long index = start >> PAGE_SHIFT; @@ -2018,7 +2059,16 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, if (delalloc_end + 1 - delalloc_start > max_bytes) delalloc_end = delalloc_start + max_bytes - 1; - /* step two, lock all the pages after the page that has start */ + /* step two, lock the state bits for the range */ + found = best_effort_lock_extent(tree, delalloc_start, &delalloc_end, + ((page_index(locked_page) + 1) << PAGE_SHIFT), + &cached_state); + if (!found) { + free_extent_state(cached_state); + goto out_failed; + } + + /* step three, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, delalloc_start, delalloc_end); ASSERT(!ret || ret == -EAGAIN); @@ -2038,20 +2088,6 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, } } - /* step three, lock the state bits for the whole range */ - lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state); - - /* then test to make sure it is all still delalloc */ - ret = test_range_bit(tree, delalloc_start, delalloc_end, - EXTENT_DELALLOC, 1, cached_state); - if (!ret) { - unlock_extent_cached(tree, delalloc_start, delalloc_end, - &cached_state); - __unlock_for_delalloc(inode, locked_page, - delalloc_start, delalloc_end); - cond_resched(); - goto again; - } free_extent_state(cached_state); *start = delalloc_start; *end = delalloc_end;