@@ -413,7 +413,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
int mirror_num = 0;
int failed_mirror = 0;
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, start,
@@ -432,7 +432,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
* there is no reason to read the other copies, they won't be
* any less wrong.
*/
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
+ if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags))
break;
num_copies = btrfs_num_copies(root->fs_info,
@@ -564,12 +564,13 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
- u64 phy_offset, struct page *page,
- u64 start, u64 end, int mirror)
+int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
+ struct page *page,
+ u64 start, u64 end, int mirror)
{
u64 found_start;
int found_level;
+ struct extent_buffer_head *ebh;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
int ret = 0;
@@ -579,18 +580,26 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
goto out;
eb = (struct extent_buffer *)page->private;
+ do {
+ if ((eb->start <= start) && (eb->start + eb->len - 1 > start))
+ break;
+ } while ((eb = eb->eb_next) != NULL);
+
+ BUG_ON(!eb);
+
+ ebh = eb_head(eb);
/* the pending IO might have been the only thing that kept this buffer
* in memory. Make sure we have a ref for all this other checks
*/
extent_buffer_get(eb);
- reads_done = atomic_dec_and_test(&eb->io_pages);
+ reads_done = atomic_dec_and_test(&ebh->io_bvecs);
if (!reads_done)
goto err;
eb->read_mirror = mirror;
- if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ if (test_bit(EXTENT_BUFFER_IOERR, &eb->ebflags)) {
ret = -EIO;
goto err;
}
@@ -632,7 +641,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* return -EIO.
*/
if (found_level == 0 && check_leaf(root, eb)) {
- set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ set_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags);
ret = -EIO;
}
@@ -640,7 +649,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
set_extent_buffer_uptodate(eb);
err:
if (reads_done &&
- test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
+ test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->ebflags))
btree_readahead_hook(root, eb, eb->start, ret);
if (ret) {
@@ -649,7 +658,7 @@ err:
* again, we have to make sure it has something
* to decrement
*/
- atomic_inc(&eb->io_pages);
+ atomic_inc(&eb_head(eb)->io_bvecs);
clear_extent_buffer_uptodate(eb);
}
free_extent_buffer(eb);
@@ -657,20 +666,6 @@ out:
return ret;
}
-static int btree_io_failed_hook(struct page *page, int failed_mirror)
-{
- struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-
- eb = (struct extent_buffer *)page->private;
- set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- eb->read_mirror = failed_mirror;
- atomic_dec(&eb->io_pages);
- if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, -EIO);
- return -EIO; /* we fixed nothing */
-}
-
static void end_workqueue_bio(struct bio *bio, int err)
{
struct end_io_wq *end_io_wq = bio->bi_private;
@@ -4109,8 +4104,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
}
static struct extent_io_ops btree_extent_io_ops = {
- .readpage_end_io_hook = btree_readpage_end_io_hook,
- .readpage_io_failed_hook = btree_io_failed_hook,
.submit_bio_hook = btree_submit_bio_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
@@ -110,6 +110,9 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
kfree(root);
}
+int verify_extent_buffer_read(struct btrfs_io_bio *io_bio,
+ struct page *page,
+ u64 start, u64 end, int mirror);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
@@ -14,6 +14,7 @@
#include "extent_io.h"
#include "extent_map.h"
#include "ctree.h"
+#include "disk-io.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
@@ -2120,7 +2121,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
- ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
+ ret = repair_io_failure(root->fs_info, start, eb->len,
start, p, mirror_num);
if (ret)
break;
@@ -3551,17 +3552,88 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
+static void end_bio_extent_buffer_readpage(struct bio *bio, int err)
+{
+ struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
+ struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec = bio->bi_io_vec;
+ struct extent_buffer *eb;
+ struct page *page = bvec->bv_page;
+ struct btrfs_root *root;
+ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ u64 start;
+ u64 end;
+ int mirror;
+ int ret;
- if (!trylock_page(p)) {
- if (!flush) {
- flush_write_bio(epd);
- flush = 1;
- }
- lock_page(p);
+ root = BTRFS_I(page->mapping->host)->root;
+
+ if (err)
+ uptodate = 0;
+
+ do {
+ page = bvec->bv_page;
+
+ if (!page->private) {
+ SetPageUptodate(page);
+ goto unlock;
}
- }
- return ret;
+ eb = (struct extent_buffer *)page->private;
+
+ start = io_bio->start_offset;
+ end = start + io_bio->len - 1;
+
+ do {
+ /*
+ read_extent_buffer_pages() does not start
+ I/O on PG_uptodate pages. Hence the bio may
+ map only part of the extent buffer.
+ */
+ if ((eb->start <= start) && (eb->start + eb->len - 1 > start))
+ break;
+ } while ((eb = eb->eb_next) != NULL);
+
+ BUG_ON(!eb);
+
+ mirror = io_bio->mirror_num;
+
+ if (uptodate) {
+ ret = verify_extent_buffer_read(io_bio, page, start,
+ end, mirror);
+ if (ret)
+ uptodate = 0;
+ }
+
+ if (!uptodate) {
+ set_bit(EXTENT_BUFFER_IOERR, &eb->ebflags);
+ eb->read_mirror = mirror;
+ atomic_dec(&eb_head(eb)->io_bvecs);
+ if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
+ &eb->ebflags))
+ btree_readahead_hook(root, eb, eb->start,
+ -EIO);
+ ClearPageUptodate(page);
+ SetPageError(page);
+ goto unlock;
+ }
+
+unlock:
+ unlock_page(page);
+ ++bvec;
+ } while (bvec <= bvec_end);
+
+ /*
+ We don't need to add a check to see if
+ extent_io_tree->track_uptodate is set or not, Since
+ this function only deals with extent buffers.
+ */
+ unlock_extent(tree, io_bio->start_offset,
+ io_bio->start_offset + io_bio->len - 1);
+
+ bio_put(bio);
}
static void end_extent_buffer_writeback(struct extent_buffer *eb)
@@ -5064,6 +5136,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, u64 start, int wait,
get_extent_t *get_extent, int mirror_num)
{
+ struct inode *inode = tree->mapping->host;
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct extent_state *cached_state = NULL;
unsigned long i;
unsigned long start_i;
struct page *page;
@@ -5076,7 +5151,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
struct bio *bio = NULL;
unsigned long bio_flags = 0;
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->ebflags))
return 0;
if (start) {
@@ -5104,21 +5179,34 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
if (all_uptodate) {
if (start_i == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->ebflags);
goto unlock_exit;
}
- clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ clear_bit(EXTENT_BUFFER_IOERR, &eb->ebflags);
eb->read_mirror = 0;
- atomic_set(&eb->io_pages, num_reads);
+ atomic_set(&eb_head(eb)->io_bvecs, num_reads);
+ lock_extent_bits(tree, eb->start, eb->start + eb->len - 1, 0,
+ &cached_state);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (!PageUptodate(page)) {
ClearPageError(page);
- err = __extent_read_full_page(tree, page,
- get_extent, &bio,
- mirror_num, &bio_flags,
- READ | REQ_META);
+ if (eb->len < PAGE_CACHE_SIZE) {
+ err = submit_extent_page(READ | REQ_META, tree,
+ page, eb->start >> 9,
+ eb->len, eb->start - page_offset(page),
+ fs_info->fs_devices->latest_bdev,
+ &bio, -1, end_bio_extent_buffer_readpage,
+ mirror_num, bio_flags, bio_flags);
+ } else {
+ err = submit_extent_page(READ | REQ_META, tree,
+ page, page_offset(page) >> 9,
+ PAGE_CACHE_SIZE, 0,
+ fs_info->fs_devices->latest_bdev,
+ &bio, -1, end_bio_extent_buffer_readpage,
+ mirror_num, bio_flags, bio_flags);
+ }
if (err)
ret = err;
} else {
@@ -5136,11 +5224,18 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (ret || wait != WAIT_COMPLETE)
return ret;
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
+ if (eb->len < PAGE_CACHE_SIZE) {
+ page = extent_buffer_page(eb, 0);
wait_on_page_locked(page);
- if (!PageUptodate(page))
+ if (!extent_buffer_uptodate(eb))
ret = -EIO;
+ } else {
+ for (i = start_i; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ wait_on_page_locked(page);
+ if (!PageUptodate(page))
+ ret = -EIO;
+ }
}
return ret;
In the case of subpagesize-blocksize, this patch makes it possible to read only a single metadata block from the disk instead of all the metadata blocks that map into a page. Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> --- fs/btrfs/disk-io.c | 45 ++++++++--------- fs/btrfs/disk-io.h | 3 ++ fs/btrfs/extent_io.c | 135 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 137 insertions(+), 46 deletions(-)