From patchwork Mon Jan 25 14:01:12 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: jim owens X-Patchwork-Id: 75048 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.2) with ESMTP id o0PE1J8m005735 for ; Mon, 25 Jan 2010 14:01:20 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752674Ab0AYOBQ (ORCPT ); Mon, 25 Jan 2010 09:01:16 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752707Ab0AYOBP (ORCPT ); Mon, 25 Jan 2010 09:01:15 -0500 Received: from g4t0017.houston.hp.com ([15.201.24.20]:48446 "EHLO g4t0017.houston.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752641Ab0AYOBO (ORCPT ); Mon, 25 Jan 2010 09:01:14 -0500 Received: from g4t0009.houston.hp.com (g4t0009.houston.hp.com [16.234.32.26]) by g4t0017.houston.hp.com (Postfix) with ESMTP id F274B384A2; Mon, 25 Jan 2010 14:01:13 +0000 (UTC) Received: from ldl (ldl.fc.hp.com [15.11.146.30]) by g4t0009.houston.hp.com (Postfix) with ESMTP id D55AAC033; Mon, 25 Jan 2010 14:01:13 +0000 (UTC) Received: from localhost (ldl.fc.hp.com [127.0.0.1]) by ldl (Postfix) with ESMTP id B7CDCCF0013; Mon, 25 Jan 2010 07:01:13 -0700 (MST) Received: from ldl ([127.0.0.1]) by localhost (ldl.fc.hp.com [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 3ARzt6KdzLQa; Mon, 25 Jan 2010 07:01:13 -0700 (MST) Received: from [192.168.0.99] (squirrel.fc.hp.com [15.11.146.57]) (Authenticated sender: owens@fc.hp.com) by ldl (Postfix) with ESMTPA id E001CCF0010; Mon, 25 Jan 2010 07:01:12 -0700 (MST) Message-ID: <4B5DA428.3020109@hp.com> Date: Mon, 25 Jan 2010 09:01:12 -0500 From: jim owens User-Agent: Thunderbird 2.0.0.23 (X11/20090817) MIME-Version: 1.0 To: linux-btrfs , Chris Mason , Josef Bacik Subject: [PATCH] Btrfs: correct mistakes in direct I/O read found by fsx Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org diff --git a/fs/btrfs/dio.c b/fs/btrfs/dio.c index 2c0579a..3315cc9 100644 --- a/fs/btrfs/dio.c +++ b/fs/btrfs/dio.c @@ -203,7 +203,7 @@ static void btrfs_dio_submit_bio(struct btrfs_dio_extcb *extcb, int dvn); static int btrfs_dio_add_user_pages(u64 *dev_left, struct btrfs_dio_extcb *extcb, int dvn); static int btrfs_dio_add_temp_pages(u64 *dev_left, struct btrfs_dio_extcb *extcb, int dvn); static int btrfs_dio_hole_read(struct btrfs_diocb *diocb, u64 hole_len); -static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len); +static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 *data_len); static int btrfs_dio_read_csum(struct btrfs_dio_extcb *extcb); static void btrfs_dio_free_retry(struct btrfs_dio_extcb *extcb); static int btrfs_dio_retry_block(struct btrfs_dio_extcb *extcb); @@ -433,9 +433,21 @@ static void btrfs_dio_read(struct btrfs_diocb *diocb) /* expand lock region to include what we read to validate checksum */ diocb->lockstart = diocb->start & ~(diocb->blocksize-1); + diocb->lockend = ALIGN(diocb->terminate, diocb->blocksize) - 1; getlock: mutex_lock(&diocb->inode->i_mutex); + + /* ensure writeout and btree update on everything + * we might read for checksum or compressed extents + */ + data_len = diocb->lockend + 1 - diocb->lockstart; + err = btrfs_wait_ordered_range(diocb->inode, diocb->lockstart, data_len); + if (err) { + diocb->error = err; + mutex_unlock(&diocb->inode->i_mutex); + return; + } data_len = i_size_read(diocb->inode); if (data_len < end) end = data_len; @@ -448,17 +460,7 @@ getlock: diocb->terminate = end; diocb->lockend = ALIGN(diocb->terminate, diocb->blocksize) - 1; } - - /* ensure writeout and btree update on everything - * we might read for checksum or compressed extents - */ - data_len = diocb->lockend + 1 - diocb->lockstart; - err = btrfs_wait_ordered_range(diocb->inode, diocb->lockstart, data_len); - if (err) { - diocb->error = err; - mutex_unlock(&diocb->inode->i_mutex); - return; - } + lock_extent(io_tree, diocb->lockstart, diocb->lockend, GFP_NOFS); mutex_unlock(&diocb->inode->i_mutex); @@ -483,7 +485,18 @@ getlock: } if (em->block_start == EXTENT_MAP_INLINE) { - err = btrfs_dio_inline_read(diocb, len); + /* ugly stuff because inline can exist in a large file + * with other extents if a hole immediately follows. + * the inline might end short of the btrfs block with + * an implied hole that we need to zero here. + */ + u64 expected = min(diocb->start + len, em->start + em->len); + err = btrfs_dio_inline_read(diocb, &len); + if (!err && expected > diocb->start) { + data_len -= len; + len = expected - diocb->start; + err = btrfs_dio_hole_read(diocb, len); + } } else { len = min(len, em->len - (diocb->start - em->start)); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || @@ -1183,9 +1196,24 @@ static void btrfs_dio_decompress(struct btrfs_dio_extcb *extcb) u32 len = extcb->icb.out_len; extcb->error = btrfs_zlib_inflate(&extcb->icb); - if (extcb->icb.out_len != len && !extcb->error) - extcb->error = -EIO; + /* ugly again - compressed extents can end with an implied hole */ + if (!extcb->error && extcb->icb.out_len != len) { + while (extcb->umc.todo) { + struct bio_vec uv; + char *out; + + extcb->error = btrfs_dio_get_user_bvec(&uv, &extcb->umc); + if (extcb->error) + goto fail; + out = kmap_atomic(uv.bv_page, KM_USER0); + memset(out + uv.bv_offset, 0, uv.bv_len); + kunmap_atomic(out, KM_USER0); + + btrfs_dio_done_with_out(&uv, NULL); + } + } +fail: btrfs_dio_release_bios(extcb, 0); } @@ -1432,7 +1460,7 @@ fail: return err; } -static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len) +static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 *data_len) { int err; size_t size; @@ -1452,8 +1480,11 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len) if (err < 0) goto notfound; err= -EDOM; - if (path->slots[0] == 0) + if (path->slots[0] == 0) { + printk(KERN_ERR "btrfs directIO inline extent leaf not found ino %lu\n", + diocb->inode->i_ino); goto fail; + } path->slots[0]--; } @@ -1473,16 +1504,26 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len) extent_start = found_key.offset; /* uncompressed size */ size = btrfs_file_extent_inline_len(leaf, item); - if (diocb->start < extent_start || diocb->start >= extent_start + size) { - printk(KERN_ERR "btrfs directIO inline extent leaf mismatch ino %lu\n", - diocb->inode->i_ino); + if (diocb->start < extent_start) { + printk(KERN_ERR "btrfs directIO inline extent range mismatch ino %lu" + " fpos %lld found start %lld size %ld\n", + diocb->inode->i_ino,diocb->start,extent_start,size); err= -EDOM; goto fail; } + /* we can end here when we start in an implied hole on a larger file */ + if (diocb->start >= extent_start + size) { + *data_len = 0; + err = 0; + goto fail; + } + extent_offset = diocb->start - extent_start; + size = min_t(u64, *data_len, size - extent_offset); - size = min_t(u64, data_len, size); + size = min_t(u64, *data_len, size); + *data_len = size; if (btrfs_file_extent_compression(leaf, item) == BTRFS_COMPRESS_ZLIB) { @@ -1523,11 +1564,11 @@ static int btrfs_dio_inline_read(struct btrfs_diocb *diocb, u64 data_len) if (!err) diocb->start += size; - /* needed if we ever allowed extents after inline - * diocb->umc.work_iov = extcb->umc.work_iov; - * diocb->umc.user_iov = extcb->umc.user_iov; - * diocb->umc.remaining = extcb->umc.remaining; - */ + /* we allow extents after inline if a hole follows */ + diocb->umc.work_iov = extcb->umc.work_iov; + diocb->umc.user_iov = extcb->umc.user_iov; + diocb->umc.remaining = extcb->umc.remaining; + kfree(extcb); } else { unsigned long inline_start; @@ -1556,9 +1597,11 @@ fail: btrfs_release_path(root, path); notfound: btrfs_free_path(path); - unlock_extent(&BTRFS_I(diocb->inode)->io_tree, diocb->lockstart, - diocb->lockstart + data_len - 1, GFP_NOFS); - diocb->lockstart += data_len; + if (!err && *data_len) { + unlock_extent(&BTRFS_I(diocb->inode)->io_tree, diocb->lockstart, + diocb->lockstart + *data_len - 1, GFP_NOFS); + diocb->lockstart += *data_len; + } return err; }