Message ID | 28cb7dbc0216d2a5f55efd296113f9f9576dda41.1668530684.git.rgoldwyn@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Lock extents before pages | expand |
On Tue, Nov 15, 2022 at 12:00:24PM -0600, Goldwyn Rodrigues wrote: > writepages() locks the extents in find_lock_delalloc_range() and unlocks > using clear_bit EXTENT_LOCKED operations is cow/delalloc operations. > > Call extent locking/unlocking around writepages() sequence as opposed to > while performing delayed allocation. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > --- > fs/btrfs/extent_io.c | 5 ----- > fs/btrfs/inode.c | 43 +++++++++++++++++++++++++++++++------------ > 2 files changed, 31 insertions(+), 17 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 92068e4ff9c3..42bae149f923 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -464,15 +464,10 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, > } > } > > - /* step three, lock the state bits for the whole range */ > - lock_extent(tree, delalloc_start, delalloc_end, &cached_state); > - > /* then test to make sure it is all still delalloc */ > ret = test_range_bit(tree, delalloc_start, delalloc_end, > EXTENT_DELALLOC, 1, cached_state); > if (!ret) { > - unlock_extent(tree, delalloc_start, delalloc_end, > - &cached_state); > __unlock_for_delalloc(inode, locked_page, > delalloc_start, delalloc_end); > cond_resched(); > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 4bfa51871ddc..92726831dd5d 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -992,7 +992,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode, > struct async_extent *async_extent, > u64 *alloc_hint) > { > - struct extent_io_tree *io_tree = &inode->io_tree; > struct btrfs_root *root = inode->root; > struct btrfs_fs_info *fs_info = root->fs_info; > struct btrfs_key ins; > @@ -1013,7 +1012,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode, > if (!(start >= locked_page_end || end <= locked_page_start)) > locked_page = async_chunk->locked_page; > } > - lock_extent(io_tree, start, end, NULL); > > /* We have fall back to uncompressed write */ > if (!async_extent->pages) > @@ -1067,7 +1065,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode, > > /* Clear dirty, set writeback and unlock the pages. */ > extent_clear_unlock_delalloc(inode, start, end, > - NULL, EXTENT_LOCKED | EXTENT_DELALLOC, > + NULL, EXTENT_DELALLOC, > PAGE_UNLOCK | PAGE_START_WRITEBACK); > if (btrfs_submit_compressed_write(inode, start, /* file_offset */ > async_extent->ram_size, /* num_bytes */ > @@ -1095,7 +1093,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode, > btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); > out_free: > extent_clear_unlock_delalloc(inode, start, end, > - NULL, EXTENT_LOCKED | EXTENT_DELALLOC | > + NULL, EXTENT_DELALLOC | > EXTENT_DELALLOC_NEW | > EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, > PAGE_UNLOCK | PAGE_START_WRITEBACK | > @@ -1263,7 +1261,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > */ > extent_clear_unlock_delalloc(inode, start, end, > locked_page, > - EXTENT_LOCKED | EXTENT_DELALLOC | > + EXTENT_DELALLOC | > EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | > EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | > PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); > @@ -1374,7 +1372,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > > extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, > locked_page, > - EXTENT_LOCKED | EXTENT_DELALLOC, > + EXTENT_DELALLOC, > page_ops); > if (num_bytes < cur_alloc_size) > num_bytes = 0; > @@ -1425,7 +1423,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > * We process each region below. > */ > > - clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | > + clear_bits = EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | > EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; > page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; > > @@ -1575,7 +1573,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, > memalloc_nofs_restore(nofs_flag); > > if (!ctx) { > - unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | > + unsigned clear_bits = EXTENT_DELALLOC | > EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | > EXTENT_DO_ACCOUNTING; > unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | > @@ -1955,7 +1953,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > path = btrfs_alloc_path(); > if (!path) { > extent_clear_unlock_delalloc(inode, start, end, locked_page, > - EXTENT_LOCKED | EXTENT_DELALLOC | > + EXTENT_DELALLOC | > EXTENT_DO_ACCOUNTING | > EXTENT_DEFRAG, PAGE_UNLOCK | > PAGE_START_WRITEBACK | > @@ -2169,7 +2167,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > nocow_args.num_bytes); > > extent_clear_unlock_delalloc(inode, cur_offset, nocow_end, > - locked_page, EXTENT_LOCKED | > + locked_page, > EXTENT_DELALLOC | > EXTENT_CLEAR_DATA_RESV, > PAGE_UNLOCK | PAGE_SET_ORDERED); > @@ -2205,7 +2203,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > > if (ret && cur_offset < end) > extent_clear_unlock_delalloc(inode, cur_offset, end, > - locked_page, EXTENT_LOCKED | > + locked_page, > EXTENT_DELALLOC | EXTENT_DEFRAG | > EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | > PAGE_START_WRITEBACK | > @@ -8223,7 +8221,28 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, > static int btrfs_writepages(struct address_space *mapping, > struct writeback_control *wbc) > { > - return extent_writepages(mapping, wbc); > + u64 start, end; > + struct inode *inode = mapping->host; > + struct extent_state *cached = NULL; > + int ret; > + u64 isize = round_up(i_size_read(inode), PAGE_SIZE) - 1; > + > + if (wbc->range_cyclic) { > + start = mapping->writeback_index << PAGE_SHIFT; > + end = isize; I had to go look, but ->range_cyclic can be used by the background flushing thread, which of course will be single threaded. However we can also trigger this with memcg flushing during balance_dirty_pages(), which means that writeback_index can change. Additionally isize can change arbitrarily between now and extent_writepages. I think you're going to have to change extent_writepages to take the start and end index in order to make sure we're not writing things we haven't locked. Alternatively you could keep track of range_cyclic here, update the wbc with the range_start and range_end that you calculate here, and reset it on the way out. Personally I think it would be cleaner to pass in the start and end into extent_writepages, and move the ->writeback_index update into this function, as well as the logic for start and end. Thanks, Josef
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 92068e4ff9c3..42bae149f923 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -464,15 +464,10 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, } } - /* step three, lock the state bits for the whole range */ - lock_extent(tree, delalloc_start, delalloc_end, &cached_state); - /* then test to make sure it is all still delalloc */ ret = test_range_bit(tree, delalloc_start, delalloc_end, EXTENT_DELALLOC, 1, cached_state); if (!ret) { - unlock_extent(tree, delalloc_start, delalloc_end, - &cached_state); __unlock_for_delalloc(inode, locked_page, delalloc_start, delalloc_end); cond_resched(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4bfa51871ddc..92726831dd5d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -992,7 +992,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode, struct async_extent *async_extent, u64 *alloc_hint) { - struct extent_io_tree *io_tree = &inode->io_tree; struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key ins; @@ -1013,7 +1012,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode, if (!(start >= locked_page_end || end <= locked_page_start)) locked_page = async_chunk->locked_page; } - lock_extent(io_tree, start, end, NULL); /* We have fall back to uncompressed write */ if (!async_extent->pages) @@ -1067,7 +1065,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode, /* Clear dirty, set writeback and unlock the pages. */ extent_clear_unlock_delalloc(inode, start, end, - NULL, EXTENT_LOCKED | EXTENT_DELALLOC, + NULL, EXTENT_DELALLOC, PAGE_UNLOCK | PAGE_START_WRITEBACK); if (btrfs_submit_compressed_write(inode, start, /* file_offset */ async_extent->ram_size, /* num_bytes */ @@ -1095,7 +1093,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode, btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); out_free: extent_clear_unlock_delalloc(inode, start, end, - NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + NULL, EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | @@ -1263,7 +1261,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, */ extent_clear_unlock_delalloc(inode, start, end, locked_page, - EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1374,7 +1372,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, locked_page, - EXTENT_LOCKED | EXTENT_DELALLOC, + EXTENT_DELALLOC, page_ops); if (num_bytes < cur_alloc_size) num_bytes = 0; @@ -1425,7 +1423,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, * We process each region below. */ - clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + clear_bits = EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; @@ -1575,7 +1573,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, memalloc_nofs_restore(nofs_flag); if (!ctx) { - unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | + unsigned clear_bits = EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING; unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | @@ -1955,7 +1953,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, path = btrfs_alloc_path(); if (!path) { extent_clear_unlock_delalloc(inode, start, end, locked_page, - EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_START_WRITEBACK | @@ -2169,7 +2167,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, nocow_args.num_bytes); extent_clear_unlock_delalloc(inode, cur_offset, nocow_end, - locked_page, EXTENT_LOCKED | + locked_page, EXTENT_DELALLOC | EXTENT_CLEAR_DATA_RESV, PAGE_UNLOCK | PAGE_SET_ORDERED); @@ -2205,7 +2203,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, if (ret && cur_offset < end) extent_clear_unlock_delalloc(inode, cur_offset, end, - locked_page, EXTENT_LOCKED | + locked_page, EXTENT_DELALLOC | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | @@ -8223,7 +8221,28 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - return extent_writepages(mapping, wbc); + u64 start, end; + struct inode *inode = mapping->host; + struct extent_state *cached = NULL; + int ret; + u64 isize = round_up(i_size_read(inode), PAGE_SIZE) - 1; + + if (wbc->range_cyclic) { + start = mapping->writeback_index << PAGE_SHIFT; + end = isize; + } else { + start = round_down(wbc->range_start, PAGE_SIZE); + end = round_up(wbc->range_end, PAGE_SIZE) - 1; + end = min(isize, end); + } + + if (start >= end) + return 0; + + lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached); + ret = extent_writepages(mapping, wbc); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached); + return ret; } static void btrfs_readahead(struct readahead_control *rac)
writepages() locks the extents in find_lock_delalloc_range() and unlocks using clear_bit EXTENT_LOCKED operations is cow/delalloc operations. Call extent locking/unlocking around writepages() sequence as opposed to while performing delayed allocation. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/btrfs/extent_io.c | 5 ----- fs/btrfs/inode.c | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-)