From patchwork Tue Jun 14 02:32:48 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jim Rees X-Patchwork-Id: 877502 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter2.kernel.org (8.14.4/8.14.4) with ESMTP id p5E2W3Ms029514 for ; Tue, 14 Jun 2011 02:32:51 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754165Ab1FNCcv (ORCPT ); Mon, 13 Jun 2011 22:32:51 -0400 Received: from merit-proxy02.merit.edu ([207.75.116.194]:52033 "EHLO merit-proxy02.merit.edu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754060Ab1FNCcu (ORCPT ); Mon, 13 Jun 2011 22:32:50 -0400 Received: from localhost (localhost.localdomain [127.0.0.1]) by merit-proxy02.merit.edu (Postfix) with ESMTP id 6BD8B2039CF4; Mon, 13 Jun 2011 22:32:50 -0400 (EDT) X-Virus-Scanned: amavisd-new at merit-proxy02.merit.edu Received: from merit-proxy02.merit.edu ([127.0.0.1]) by localhost (merit-proxy02.merit.edu [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id edywYwHYutE3; Mon, 13 Jun 2011 22:32:49 -0400 (EDT) Received: from merit.edu (74-126-0-171.static.123.net [74.126.0.171]) by merit-proxy02.merit.edu (Postfix) with ESMTPSA id 69B552039CEF; Mon, 13 Jun 2011 22:32:49 -0400 (EDT) X-Mailbox-Line: From 2bf0b7bafe8cf872ca215dc3fb3c62f902a78299 Mon Sep 17 00:00:00 2001 Message-Id: <2bf0b7bafe8cf872ca215dc3fb3c62f902a78299.1308017749.git.rees@umich.edu> In-Reply-To: References: Subject: [PATCH 19/33] pnfsblock: xdr decode pnfs_block_layout4 To: Benny Halevy Cc: linux-nfs@vger.kernel.org, peter honeyman Date: Mon, 13 Jun 2011 22:32:48 -0400 From: Jim Rees Sender: linux-nfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter2.kernel.org [140.211.167.43]); Tue, 14 Jun 2011 02:32:51 +0000 (UTC) From: Fred Isaman XDR decodes the block layout payload sent in LAYOUTGET result, storing the result in an extent list. Signed-off-by: Fred Isaman [pnfsblock: fix bug getting pnfs_layout_type in translate_devid().] Signed-off-by: Tao Guo Signed-off-by: Benny Halevy --- fs/nfs/blocklayout/blocklayoutdev.c | 191 ++++++++++++++++++++++++++++++++++- 1 files changed, 189 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 0fedf50..a90eb6b 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -150,10 +150,197 @@ out_err: return NULL; } +/* Map deviceid returned by the server to constructed block_device */ +static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, + struct nfs4_deviceid *id) +{ + struct block_device *rv = NULL; + struct block_mount_id *mid; + struct pnfs_block_dev *dev; + + dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); + mid = BLK_ID(lo); + spin_lock(&mid->bm_lock); + list_for_each_entry(dev, &mid->bm_devlist, bm_node) { + if (memcmp(id->data, dev->bm_mdevid.data, + NFS4_DEVICEID4_SIZE) == 0) { + rv = dev->bm_mdev; + goto out; + } + } + out: + spin_unlock(&mid->bm_lock); + dprintk("%s returning %p\n", __func__, rv); + return rv; +} + +/* Tracks info needed to ensure extents in layout obey constraints of spec */ +struct layout_verification { + u32 mode; /* R or RW */ + u64 start; /* Expected start of next non-COW extent */ + u64 inval; /* Start of INVAL coverage */ + u64 cowread; /* End of COW read coverage */ +}; + +/* Verify the extent meets the layout requirements of the pnfs-block draft, + * section 2.3.1. + */ +static int verify_extent(struct pnfs_block_extent *be, + struct layout_verification *lv) +{ + if (lv->mode == IOMODE_READ) { + if (be->be_state == PNFS_BLOCK_READWRITE_DATA || + be->be_state == PNFS_BLOCK_INVALID_DATA) + return -EIO; + if (be->be_f_offset != lv->start) + return -EIO; + lv->start += be->be_length; + return 0; + } + /* lv->mode == IOMODE_RW */ + if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { + if (be->be_f_offset != lv->start) + return -EIO; + if (lv->cowread > lv->start) + return -EIO; + lv->start += be->be_length; + lv->inval = lv->start; + return 0; + } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + if (be->be_f_offset != lv->start) + return -EIO; + lv->start += be->be_length; + return 0; + } else if (be->be_state == PNFS_BLOCK_READ_DATA) { + if (be->be_f_offset > lv->start) + return -EIO; + if (be->be_f_offset < lv->inval) + return -EIO; + if (be->be_f_offset < lv->cowread) + return -EIO; + /* It looks like you might want to min this with lv->start, + * but you really don't. + */ + lv->inval = lv->inval + be->be_length; + lv->cowread = be->be_f_offset + be->be_length; + return 0; + } else + return -EIO; +} + +/* XDR decode pnfs_block_layout4 structure */ int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) { - /* STUB */ - return -EIO; + struct pnfs_block_layout *bl = BLK_LO2EXT(lo); + int i, status = -EIO; + uint32_t count; + struct pnfs_block_extent *be = NULL, *save; + struct xdr_stream stream; + struct xdr_buf buf; + struct page *scratch; + __be32 *p; + uint64_t tmp; /* Used by READSECTOR */ + struct layout_verification lv = { + .mode = lgr->range.iomode, + .start = lgr->range.offset >> 9, + .inval = lgr->range.offset >> 9, + .cowread = lgr->range.offset >> 9, + }; + LIST_HEAD(extents); + + dprintk("---> %s\n", __func__); + + scratch = alloc_page(gfp_flags); + if (!scratch) + return -ENOMEM; + + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err; + + READ32(count); + + dprintk("%s enter, number of extents %i\n", __func__, count); + p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); + if (unlikely(!p)) + goto out_err; + + /* Decode individual extents, putting them in temporary + * staging area until whole layout is decoded to make error + * recovery easier. + */ + for (i = 0; i < count; i++) { + be = alloc_extent(); + if (!be) { + status = -ENOMEM; + goto out_err; + } + READ_DEVID(&be->be_devid); + be->be_mdev = translate_devid(lo, &be->be_devid); + if (!be->be_mdev) + goto out_err; + + /* The next three values are read in as bytes, + * but stored as 512-byte sector lengths + */ + READ_SECTOR(be->be_f_offset); + READ_SECTOR(be->be_length); + READ_SECTOR(be->be_v_offset); + READ32(be->be_state); + if (be->be_state == PNFS_BLOCK_INVALID_DATA) + be->be_inval = &bl->bl_inval; + if (verify_extent(be, &lv)) { + dprintk("%s verify failed\n", __func__); + goto out_err; + } + list_add_tail(&be->be_node, &extents); + } + if (lgr->range.offset + lgr->range.length != lv.start << 9) { + dprintk("%s Final length mismatch\n", __func__); + be = NULL; + goto out_err; + } + if (lv.start < lv.cowread) { + dprintk("%s Final uncovered COW extent\n", __func__); + be = NULL; + goto out_err; + } + /* Extents decoded properly, now try to merge them in to + * existing layout extents. + */ + spin_lock(&bl->bl_ext_lock); + list_for_each_entry_safe(be, save, &extents, be_node) { + list_del(&be->be_node); + status = add_and_merge_extent(bl, be); + if (status) { + spin_unlock(&bl->bl_ext_lock); + /* This is a fairly catastrophic error, as the + * entire layout extent lists are now corrupted. + * We should have some way to distinguish this. + */ + be = NULL; + goto out_err; + } + } + spin_unlock(&bl->bl_ext_lock); + status = 0; + out: + __free_page(scratch); + dprintk("%s returns %i\n", __func__, status); + return status; + + out_err: + put_extent(be); + while (!list_empty(&extents)) { + be = list_first_entry(&extents, struct pnfs_block_extent, + be_node); + list_del(&be->be_node); + put_extent(be); + } + goto out; }