@@ -142,6 +142,7 @@ struct pnfs_block_layout {
sector_t bl_blocksize; /* Server blocksize in sectors */
};
+#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_MOUNTID(lo)->mountid))
#define BLK_LSEG2EXT(lseg) ((struct pnfs_block_layout *)lseg->layout->ld_data)
#define BLK_LO2EXT(lo) ((struct pnfs_block_layout *)lo->ld_data)
@@ -195,4 +196,5 @@ int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
void free_block_dev(struct pnfs_block_dev *bdev);
/* extents.c */
void put_extent(struct pnfs_block_extent *be);
+struct pnfs_block_extent *alloc_extent(void);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
@@ -554,11 +554,172 @@ nfs4_blk_decode_device(struct super_block *sb,
return rv;
}
+/* Map deviceid returned by the server to constructed block_device */
+static struct block_device *translate_devid(struct pnfs_layout_type *lo,
+ struct pnfs_deviceid *id)
+{
+ struct block_device *rv = NULL;
+ struct block_mount_id *mid;
+ struct pnfs_block_dev *dev;
+
+ dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
+ mid = BLK_ID(lo);
+ spin_lock(&mid->bm_lock);
+ list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
+ if (memcmp(id->data, dev->bm_mdevid.data,
+ NFS4_PNFS_DEVICEID4_SIZE) == 0) {
+ rv = dev->bm_mdev;
+ goto out;
+ }
+ }
+ out:
+ spin_unlock(&mid->bm_lock);
+ dprintk("%s returning %p\n", __func__, rv);
+ return rv;
+}
+
+/* Tracks info needed to ensure extents in layout obey constraints of spec */
+struct layout_verification {
+ u32 mode; /* R or RW */
+ u64 start; /* Expected start of next non-COW extent */
+ u64 inval; /* Start of INVAL coverage */
+ u64 cowread; /* End of COW read coverage */
+};
+
+/* Verify the extent meets the layout requirements of the pnfs-block draft,
+ * section 2.3.1.
+ */
+static int verify_extent(struct pnfs_block_extent *be,
+ struct layout_verification *lv)
+{
+ if (lv->mode == IOMODE_READ) {
+ if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
+ be->be_state == PNFS_BLOCK_INVALID_DATA)
+ return -EIO;
+ if (be->be_f_offset != lv->start)
+ return -EIO;
+ lv->start += be->be_length;
+ return 0;
+ }
+ /* lv->mode == IOMODE_RW */
+ if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
+ if (be->be_f_offset != lv->start)
+ return -EIO;
+ if (lv->cowread > lv->start)
+ return -EIO;
+ lv->start += be->be_length;
+ lv->inval = lv->start;
+ return 0;
+ } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (be->be_f_offset != lv->start)
+ return -EIO;
+ lv->start += be->be_length;
+ return 0;
+ } else if (be->be_state == PNFS_BLOCK_READ_DATA) {
+ if (be->be_f_offset > lv->start)
+ return -EIO;
+ if (be->be_f_offset < lv->inval)
+ return -EIO;
+ if (be->be_f_offset < lv->cowread)
+ return -EIO;
+ /* It looks like you might want to min this with lv->start,
+ * but you really don't.
+ */
+ lv->inval = lv->inval + be->be_length;
+ lv->cowread = be->be_f_offset + be->be_length;
+ return 0;
+ } else
+ return -EIO;
+}
+
/* XDR decode pnfs_block_layout4 structure */
int
nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layoutget_res *lgr)
{
- /* STUB */
- return -EIO;
+ struct pnfs_block_layout *bl = PNFS_LD_DATA(lo);
+ uint32_t *p = (uint32_t *)lgr->layout.buf;
+ uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
+ int i, status = -EIO;
+ uint32_t count;
+ struct pnfs_block_extent *be = NULL;
+ uint64_t tmp; /* Used by READSECTOR */
+ struct layout_verification lv = {
+ .mode = lgr->lseg.iomode,
+ .start = lgr->lseg.offset >> 9,
+ .inval = lgr->lseg.offset >> 9,
+ .cowread = lgr->lseg.offset >> 9,
+ };
+
+ LIST_HEAD(extents);
+
+ BLK_READBUF(p, end, 4);
+ READ32(count);
+
+ dprintk("%s enter, number of extents %i\n", __func__, count);
+ BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
+
+ /* Decode individual extents, putting them in temporary
+ * staging area until whole layout is decoded to make error
+ * recovery easier.
+ */
+ for (i = 0; i < count; i++) {
+ be = alloc_extent();
+ if (!be) {
+ status = -ENOMEM;
+ goto out_err;
+ }
+ READ_DEVID(&be->be_devid);
+ be->be_mdev = translate_devid(lo, &be->be_devid);
+ if (!be->be_mdev)
+ goto out_err;
+ /* The next three values are read in as bytes,
+ * but stored as 512-byte sector lengths
+ */
+ READ_SECTOR(be->be_f_offset);
+ READ_SECTOR(be->be_length);
+ READ_SECTOR(be->be_v_offset);
+ READ32(be->be_state);
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA)
+ be->be_inval = &bl->bl_inval;
+ if (verify_extent(be, &lv)) {
+ dprintk("%s verify failed\n", __func__);
+ goto out_err;
+ }
+ list_add_tail(&be->be_node, &extents);
+ }
+ if (p != end) {
+ dprintk("%s Undecoded cruft at end of opaque\n", __func__);
+ be = NULL;
+ goto out_err;
+ }
+ if (lgr->lseg.offset + lgr->lseg.length != lv.start << 9) {
+ dprintk("%s Final length mismatch\n", __func__);
+ be = NULL;
+ goto out_err;
+ }
+ if (lv.start < lv.cowread) {
+ dprintk("%s Final uncovered COW extent\n", __func__);
+ be = NULL;
+ goto out_err;
+ }
+ /* Extents decoded properly, now try to merge them in to
+ * existing layout extents.
+ */
+ /* STUB - instead we just throw them away */
+ status = 0;
+ goto out_err;
+ out:
+ dprintk("%s returns %i\n", __func__, status);
+ return status;
+
+ out_err:
+ put_extent(be);
+ while (!list_empty(&extents)) {
+ be = list_first_entry(&extents, struct pnfs_block_extent,
+ be_node);
+ list_del(&be->be_node);
+ put_extent(be);
+ }
+ goto out;
}
@@ -53,3 +53,15 @@ put_extent(struct pnfs_block_extent *be)
}
}
+struct pnfs_block_extent *alloc_extent(void)
+{
+ struct pnfs_block_extent *be;
+
+ be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
+ if (!be)
+ return NULL;
+ INIT_LIST_HEAD(&be->be_node);
+ kref_init(&be->be_refcnt);
+ be->be_inval = NULL;
+ return be;
+}