Message ID | 1394634033-2528-2-git-send-email-chandan@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Deferred |
Headers | show |
On Wed, Mar 12, 2014 at 07:50:28PM +0530, Chandan Rajendra wrote: > bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions > to track the file offset range operated on by the bio. Hence this patch adds > two new members to 'struct btrfs_io_bio' to track the file offset range. > > This patch also brings back check_page_locked() to reliably unlock pages in > readpage's end bio function. > > Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> > --- > fs/btrfs/extent_io.c | 122 +++++++++++++++++++++++++++++++++------------------ > fs/btrfs/volumes.h | 3 ++ > 2 files changed, 82 insertions(+), 43 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index fbe501d..5a65aee 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -1943,15 +1943,31 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, > * helper function to set a given page up to date if all the > * extents in the tree for that page are up to date > */ > -static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) > +static void check_page_uptodate(struct extent_io_tree *tree, struct page *page, > + struct extent_state *cached) > { > u64 start = page_offset(page); > u64 end = start + PAGE_CACHE_SIZE - 1; > - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) > + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached)) > SetPageUptodate(page); > } > > /* > + * helper function to unlock a page if all the extents in the tree > + * for that page are unlocked > + */ > +static void check_page_locked(struct extent_io_tree *tree, struct page *page) > +{ > + u64 start = page_offset(page); > + u64 end = start + PAGE_CACHE_SIZE - 1; > + > + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) { > + unlock_page(page); > + } > +} > + > + > +/* > * When IO fails, either with EIO or csum verification fails, we > * try other mirrors that might have a good copy of the data. This > * io_failure_record is used to record state as we go through all the > @@ -2414,16 +2430,33 @@ static void end_bio_extent_writepage(struct bio *bio, int err) > bio_put(bio); > } > > -static void > -endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, > - int uptodate) > +static void unlock_extent_and_page(struct address_space *mapping, > + struct extent_io_tree *tree, > + struct btrfs_io_bio *io_bio) > { > - struct extent_state *cached = NULL; > - u64 end = start + len - 1; > + pgoff_t index; > + u64 offset, len; > + /* > + * This btrfs_io_bio may span multiple pages. > + * We need to unlock the pages convered by them > + * if we got endio callback for all the blocks in the page. > + * btrfs_io_bio also contain "contigous blocks of the file" > + * look at submit_extent_page for more details. > + */ > > - if (uptodate && tree->track_uptodate) > - set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); > - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); > + offset = io_bio->start_offset; > + len = io_bio->len; > + unlock_extent(tree, offset, offset + len - 1); > + > + index = offset >> PAGE_CACHE_SHIFT; > + while (offset < io_bio->start_offset + len) { > + struct page *page; > + page = find_get_page(mapping, index); > + check_page_locked(tree, page); > + page_cache_release(page); > + index++; > + offset += PAGE_CACHE_SIZE; > + } > } > > /* > @@ -2443,13 +2476,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) > struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; > struct bio_vec *bvec = bio->bi_io_vec; > struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); > + struct address_space *mapping = bio->bi_io_vec->bv_page->mapping; > struct extent_io_tree *tree; > + struct extent_state *cached = NULL; > u64 offset = 0; > u64 start; > u64 end; > u64 len; > - u64 extent_start = 0; > - u64 extent_len = 0; > int mirror; > int ret; > > @@ -2482,8 +2515,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) > bvec->bv_offset, bvec->bv_len); > } > > - start = page_offset(page); > - end = start + bvec->bv_offset + bvec->bv_len - 1; > + start = page_offset(page) + bvec->bv_offset; > + end = start + bvec->bv_len - 1; > len = bvec->bv_len; > > if (++bvec <= bvec_end) > @@ -2540,40 +2573,24 @@ readpage_ok: > offset = i_size & (PAGE_CACHE_SIZE-1); > if (page->index == end_index && offset) > zero_user_segment(page, offset, PAGE_CACHE_SIZE); > - SetPageUptodate(page); > + if (tree->track_uptodate) > + set_extent_uptodate(tree, start, end, &cached, > + GFP_ATOMIC); > } else { > ClearPageUptodate(page); > SetPageError(page); > } > - unlock_page(page); > - offset += len; > > - if (unlikely(!uptodate)) { > - if (extent_len) { > - endio_readpage_release_extent(tree, > - extent_start, > - extent_len, 1); > - extent_start = 0; > - extent_len = 0; > - } > - endio_readpage_release_extent(tree, start, > - end - start + 1, 0); > - } else if (!extent_len) { > - extent_start = start; > - extent_len = end + 1 - start; > - } else if (extent_start + extent_len == start) { > - extent_len += end + 1 - start; > - } else { > - endio_readpage_release_extent(tree, extent_start, > - extent_len, uptodate); > - extent_start = start; > - extent_len = end + 1 - start; > - } > + offset += len; > + /* > + * Check whether the page in the bvec can be marked uptodate > + */ > + check_page_uptodate(tree, page, cached); > } while (bvec <= bvec_end); > - > - if (extent_len) > - endio_readpage_release_extent(tree, extent_start, extent_len, > - uptodate); > + /* > + * Unlock the btrfs_bio and associated page > + */ > + unlock_extent_and_page(mapping, tree, io_bio); > if (io_bio->end_io) > io_bio->end_io(io_bio, err); > bio_put(bio); > @@ -2700,6 +2717,18 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, > else > contig = bio_end_sector(bio) == sector; > > + if (contig) { > + /* > + * Check whether we are contig if file offsets. > + * We should mostly be for readpage/readpages > + * We need to do this because we use btrfs_io_bio > + * start_offset and len to unlock in endio routines. > + */ > + if ((page_offset(page) + offset) != > + (btrfs_io_bio(bio)->start_offset + > + btrfs_io_bio(bio)->len)) > + contig = 0; > + } > if (prev_bio_flags != bio_flags || !contig || > merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || > bio_add_page(bio, page, page_size, offset) < page_size) { > @@ -2709,6 +2738,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, > return ret; > bio = NULL; > } else { > + /* > + * update btrfs_io_bio len. So that we can unlock > + * correctly in end_io callback. > + */ > + btrfs_io_bio(bio)->len += page_size; > return 0; > } > } > @@ -2724,6 +2758,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, > bio_add_page(bio, page, page_size, offset); > bio->bi_end_io = end_io_func; > bio->bi_private = tree; > + btrfs_io_bio(bio)->start_offset = page_offset(page) + offset; > + btrfs_io_bio(bio)->len = page_size; > > if (bio_ret) > *bio_ret = bio; > @@ -2914,7 +2950,7 @@ static int __do_readpage(struct extent_io_tree *tree, > /* the get_extent function already copied into the page */ > if (test_range_bit(tree, cur, cur_end, > EXTENT_UPTODATE, 1, NULL)) { > - check_page_uptodate(tree, page); > + check_page_uptodate(tree, page, NULL); > if (!parent_locked) > unlock_extent(tree, cur, cur + iosize - 1); > cur = cur + iosize; > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 80754f9..2e16f4d 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -174,6 +174,9 @@ struct btrfs_io_bio { > u8 *csum_allocated; > btrfs_io_bio_end_io_t *end_io; > struct bio bio; > + /* Track file offset range operated on by the bio.*/ > + u64 start_offset; > + u64 len; These should be put in front of "struct bio bio", otherwise, it might lead to errors, according to bioset_create()'s comments, ---------------------------------------------------------------------- "Note that the bio must be embedded at the END of that structure always, or things will break badly." ---------------------------------------------------------------------- -liubo -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> These should be put in front of "struct bio bio", > otherwise, it might lead to errors, according to bioset_create()'s comments, > > ---------------------------------------------------------------------- > "Note that the bio must be embedded at the END of that structure always, > or things will break badly." > ---------------------------------------------------------------------- > Thank you for pointing that out. I will fix it. Thanks, chandan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fbe501d..5a65aee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1943,15 +1943,31 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, * helper function to set a given page up to date if all the * extents in the tree for that page are up to date */ -static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) +static void check_page_uptodate(struct extent_io_tree *tree, struct page *page, + struct extent_state *cached) { u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached)) SetPageUptodate(page); } /* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static void check_page_locked(struct extent_io_tree *tree, struct page *page) +{ + u64 start = page_offset(page); + u64 end = start + PAGE_CACHE_SIZE - 1; + + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) { + unlock_page(page); + } +} + + +/* * When IO fails, either with EIO or csum verification fails, we * try other mirrors that might have a good copy of the data. This * io_failure_record is used to record state as we go through all the @@ -2414,16 +2430,33 @@ static void end_bio_extent_writepage(struct bio *bio, int err) bio_put(bio); } -static void -endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, - int uptodate) +static void unlock_extent_and_page(struct address_space *mapping, + struct extent_io_tree *tree, + struct btrfs_io_bio *io_bio) { - struct extent_state *cached = NULL; - u64 end = start + len - 1; + pgoff_t index; + u64 offset, len; + /* + * This btrfs_io_bio may span multiple pages. + * We need to unlock the pages convered by them + * if we got endio callback for all the blocks in the page. + * btrfs_io_bio also contain "contigous blocks of the file" + * look at submit_extent_page for more details. + */ - if (uptodate && tree->track_uptodate) - set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); + offset = io_bio->start_offset; + len = io_bio->len; + unlock_extent(tree, offset, offset + len - 1); + + index = offset >> PAGE_CACHE_SHIFT; + while (offset < io_bio->start_offset + len) { + struct page *page; + page = find_get_page(mapping, index); + check_page_locked(tree, page); + page_cache_release(page); + index++; + offset += PAGE_CACHE_SIZE; + } } /* @@ -2443,13 +2476,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct address_space *mapping = bio->bi_io_vec->bv_page->mapping; struct extent_io_tree *tree; + struct extent_state *cached = NULL; u64 offset = 0; u64 start; u64 end; u64 len; - u64 extent_start = 0; - u64 extent_len = 0; int mirror; int ret; @@ -2482,8 +2515,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) bvec->bv_offset, bvec->bv_len); } - start = page_offset(page); - end = start + bvec->bv_offset + bvec->bv_len - 1; + start = page_offset(page) + bvec->bv_offset; + end = start + bvec->bv_len - 1; len = bvec->bv_len; if (++bvec <= bvec_end) @@ -2540,40 +2573,24 @@ readpage_ok: offset = i_size & (PAGE_CACHE_SIZE-1); if (page->index == end_index && offset) zero_user_segment(page, offset, PAGE_CACHE_SIZE); - SetPageUptodate(page); + if (tree->track_uptodate) + set_extent_uptodate(tree, start, end, &cached, + GFP_ATOMIC); } else { ClearPageUptodate(page); SetPageError(page); } - unlock_page(page); - offset += len; - if (unlikely(!uptodate)) { - if (extent_len) { - endio_readpage_release_extent(tree, - extent_start, - extent_len, 1); - extent_start = 0; - extent_len = 0; - } - endio_readpage_release_extent(tree, start, - end - start + 1, 0); - } else if (!extent_len) { - extent_start = start; - extent_len = end + 1 - start; - } else if (extent_start + extent_len == start) { - extent_len += end + 1 - start; - } else { - endio_readpage_release_extent(tree, extent_start, - extent_len, uptodate); - extent_start = start; - extent_len = end + 1 - start; - } + offset += len; + /* + * Check whether the page in the bvec can be marked uptodate + */ + check_page_uptodate(tree, page, cached); } while (bvec <= bvec_end); - - if (extent_len) - endio_readpage_release_extent(tree, extent_start, extent_len, - uptodate); + /* + * Unlock the btrfs_bio and associated page + */ + unlock_extent_and_page(mapping, tree, io_bio); if (io_bio->end_io) io_bio->end_io(io_bio, err); bio_put(bio); @@ -2700,6 +2717,18 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, else contig = bio_end_sector(bio) == sector; + if (contig) { + /* + * Check whether we are contig if file offsets. + * We should mostly be for readpage/readpages + * We need to do this because we use btrfs_io_bio + * start_offset and len to unlock in endio routines. + */ + if ((page_offset(page) + offset) != + (btrfs_io_bio(bio)->start_offset + + btrfs_io_bio(bio)->len)) + contig = 0; + } if (prev_bio_flags != bio_flags || !contig || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { @@ -2709,6 +2738,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, return ret; bio = NULL; } else { + /* + * update btrfs_io_bio len. So that we can unlock + * correctly in end_io callback. + */ + btrfs_io_bio(bio)->len += page_size; return 0; } } @@ -2724,6 +2758,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + btrfs_io_bio(bio)->start_offset = page_offset(page) + offset; + btrfs_io_bio(bio)->len = page_size; if (bio_ret) *bio_ret = bio; @@ -2914,7 +2950,7 @@ static int __do_readpage(struct extent_io_tree *tree, /* the get_extent function already copied into the page */ if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { - check_page_uptodate(tree, page); + check_page_uptodate(tree, page, NULL); if (!parent_locked) unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 80754f9..2e16f4d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -174,6 +174,9 @@ struct btrfs_io_bio { u8 *csum_allocated; btrfs_io_bio_end_io_t *end_io; struct bio bio; + /* Track file offset range operated on by the bio.*/ + u64 start_offset; + u64 len; }; static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions to track the file offset range operated on by the bio. Hence this patch adds two new members to 'struct btrfs_io_bio' to track the file offset range. This patch also brings back check_page_locked() to reliably unlock pages in readpage's end bio function. Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> --- fs/btrfs/extent_io.c | 122 +++++++++++++++++++++++++++++++++------------------ fs/btrfs/volumes.h | 3 ++ 2 files changed, 82 insertions(+), 43 deletions(-)