@@ -4051,6 +4051,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
bool uptodate = !bbio->bio.bi_status;
struct bvec_iter_all iter_all;
struct bio_vec *bvec;
@@ -4070,26 +4071,49 @@ static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
}
bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
- end_page_read(bvec->bv_page, uptodate, eb->start + bio_offset,
- bvec->bv_len);
+ u64 start = eb->start + bio_offset;
+ struct page *page = bvec->bv_page;
+ u32 len = bvec->bv_len;
+
+ if (uptodate)
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ else
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+
bio_offset += bvec->bv_len;
}
- if (eb->fs_info->nodesize < PAGE_SIZE) {
- unlock_extent(&bbio->inode->io_tree, eb->start,
- eb->start + bio_offset - 1, NULL);
- }
+ clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
free_extent_buffer(eb);
bio_put(&bbio->bio);
}
-static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
- struct btrfs_tree_parent_check *check)
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
+ struct btrfs_tree_parent_check *check)
{
int num_pages = num_extent_pages(eb), i;
struct btrfs_bio *bbio;
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+ return 0;
+
+ /*
+ * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
+ * operation, which could potentially still be in flight. In this case
+ * we simply want to return an error.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
+ return -EIO;
+
+ /*
+ * Someone else is already reading the buffer, just wait for it.
+ */
+ if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
+ goto done;
+
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
@@ -4110,117 +4134,16 @@ static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
__bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0);
}
btrfs_submit_bio(bbio, mirror_num);
-}
-
-static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
- int mirror_num,
- struct btrfs_tree_parent_check *check)
-{
- struct btrfs_fs_info *fs_info = eb->fs_info;
- struct extent_io_tree *io_tree;
- struct page *page = eb->pages[0];
- struct extent_state *cached_state = NULL;
- int ret;
- ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
- ASSERT(PagePrivate(page));
- ASSERT(check);
- io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
-
- if (wait == WAIT_NONE) {
- if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state))
- return -EAGAIN;
- } else {
- ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- if (ret < 0)
- return ret;
- }
-
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) {
- unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- return 0;
- }
-
- btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
-
- __read_extent_buffer_pages(eb, mirror_num, check);
- if (wait != WAIT_COMPLETE) {
- free_extent_state(cached_state);
- return 0;
- }
-
- wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
- EXTENT_LOCKED, &cached_state);
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return -EIO;
- return 0;
-}
-
-int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
- struct btrfs_tree_parent_check *check)
-{
- int i;
- struct page *page;
- int locked_pages = 0;
- int num_pages;
-
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return 0;
-
- /*
- * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
- * operation, which could potentially still be in flight. In this case
- * we simply want to return an error.
- */
- if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
- return -EIO;
-
- if (eb->fs_info->nodesize < PAGE_SIZE)
- return read_extent_buffer_subpage(eb, wait, mirror_num, check);
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- if (wait == WAIT_NONE) {
- /*
- * WAIT_NONE is only utilized by readahead. If we can't
- * acquire the lock atomically it means either the eb
- * is being read out or under modification.
- * Either way the eb will be or has been cached,
- * readahead can exit safely.
- */
- if (!trylock_page(page))
- goto unlock_exit;
- } else {
- lock_page(page);
- }
- locked_pages++;
- }
-
- __read_extent_buffer_pages(eb, mirror_num, check);
-
- if (wait != WAIT_COMPLETE)
- return 0;
-
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- wait_on_page_locked(page);
- if (!PageUptodate(page))
+done:
+ if (wait == WAIT_COMPLETE) {
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
+ TASK_UNINTERRUPTIBLE);
+ if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return -EIO;
}
return 0;
-
-unlock_exit:
- while (locked_pages > 0) {
- locked_pages--;
- page = eb->pages[locked_pages];
- unlock_page(page);
- }
- return 0;
}
static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
@@ -29,6 +29,7 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
+ EXTENT_BUFFER_READING,
};
/* these are flags for __process_pages_contig */
Instead of locking and unlocking every page or the extent, just add a new EXTENT_BUFFER_READING bit that mirrors EXTENT_BUFFER_WRITEBACK for synchronizing threads trying to read an extent_buffer and to wait for I/O completion. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/btrfs/extent_io.c | 151 +++++++++++-------------------------------- fs/btrfs/extent_io.h | 1 + 2 files changed, 38 insertions(+), 114 deletions(-)