Message ID | 20180808193140.1463-1-gedwards@ddn.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/iblock: split T10 PI SGL across command bios | expand |
On 08/08/2018 02:31 PM, Greg Edwards wrote: > When T10 PI is enabled on a backing device for the iblock backstore, the > PI SGL for the entire command is attached to the first bio only. This > works fine if the command is covered by a single bio, but results in > integrity verification errors for the other bios in a multi-bio command. > Did you hit this with a older distro kernel? It looks like iblock_get_bio will alloc a bio that has enough vecs for the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to me how does the bio_add_page call ever return a value other than sg->length, and we end up doing another iblock_get_bio call? > Split the PI SGL across the bios in the command, so each bip contains > the protection information for the data in the bio. In a multi-bio > command, store where we left off in the PI SGL so we know where to start > with the next bio. > > Signed-off-by: Greg Edwards <gedwards@ddn.com> > --- > This patch depends on commit 359f642700f2 ("block: move > bio_integrity_{intervals,bytes} into blkdev.h") in Jens' block for-next branch. > > drivers/target/target_core_iblock.c | 60 +++++++++++++++++++++-------- > 1 file changed, 44 insertions(+), 16 deletions(-) > > diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c > index ce1321a5cb7b..d3ab83282f61 100644 > --- a/drivers/target/target_core_iblock.c > +++ b/drivers/target/target_core_iblock.c > @@ -635,7 +635,8 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) > } > > static int > -iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) > +iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio, > + struct scatterlist **sgl, unsigned int *skip) > { > struct se_device *dev = cmd->se_dev; > struct blk_integrity *bi; > @@ -643,6 +644,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) > struct iblock_dev *ib_dev = IBLOCK_DEV(dev); > struct scatterlist *sg; > int i, rc; > + unsigned int size, nr_pages, len, off; > > bi = bdev_get_integrity(ib_dev->ibd_bd); > if (!bi) { > @@ -650,32 +652,52 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) > return -ENODEV; > } > > - bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents); > + nr_pages = min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES); > + bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); > if (IS_ERR(bip)) { > pr_err("Unable to allocate bio_integrity_payload\n"); > return PTR_ERR(bip); > } > > - bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) * > - dev->prot_length; > - bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; > + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); > + bip_set_seed(bip, bio->bi_iter.bi_sector); > > pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size, > (unsigned long long)bip->bip_iter.bi_sector); > > - for_each_sg(cmd->t_prot_sg, sg, cmd->t_prot_nents, i) { > + size = bip->bip_iter.bi_size; > + for_each_sg(*sgl, sg, nr_pages, i) { > > - rc = bio_integrity_add_page(bio, sg_page(sg), sg->length, > - sg->offset); > - if (rc != sg->length) { > + len = sg->length - *skip; > + off = sg->offset + *skip; > + > + if (*skip) > + *skip = 0; > + > + if (len > size) { > + len = size; > + *skip = len; > + } > + > + rc = bio_integrity_add_page(bio, sg_page(sg), len, off); > + if (rc != len) { > pr_err("bio_integrity_add_page() failed; %d\n", rc); > return -ENOMEM; > } > > pr_debug("Added bio integrity page: %p length: %d offset; %d\n", > - sg_page(sg), sg->length, sg->offset); > + sg_page(sg), len, off); > + > + size -= len; > + if (!size) > + break; > } > > + if (*skip == 0) > + *sgl = sg_next(sg); > + else > + *sgl = sg; > + > return 0; > } > > @@ -686,12 +708,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > struct se_device *dev = cmd->se_dev; > sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); > struct iblock_req *ibr; > - struct bio *bio, *bio_start; > + struct bio *bio; > struct bio_list list; > - struct scatterlist *sg; > + struct scatterlist *sg, *sg_prot = cmd->t_prot_sg; > u32 sg_num = sgl_nents; > - unsigned bio_cnt; > - int i, op, op_flags = 0; > + unsigned int bio_cnt, skip = 0; > + int i, rc, op, op_flags = 0; > > if (data_direction == DMA_TO_DEVICE) { > struct iblock_dev *ib_dev = IBLOCK_DEV(dev); > @@ -726,7 +748,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > if (!bio) > goto fail_free_ibr; > > - bio_start = bio; > bio_list_init(&list); > bio_list_add(&list, bio); > > @@ -741,6 +762,13 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > */ > while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) > != sg->length) { > + if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { > + rc = iblock_alloc_bip(cmd, bio, &sg_prot, > + &skip); > + if (rc) > + goto fail_put_bios; > + } > + > if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) { > iblock_submit_bios(&list); > bio_cnt = 0; > @@ -762,7 +790,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > } > > if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { > - int rc = iblock_alloc_bip(cmd, bio_start); > + rc = iblock_alloc_bip(cmd, bio, &sg_prot, &skip); > if (rc) > goto fail_put_bios; > } >
On Tue, Aug 21, 2018 at 04:07:08PM -0500, Mike Christie wrote: > On 08/08/2018 02:31 PM, Greg Edwards wrote: >> When T10 PI is enabled on a backing device for the iblock backstore, the >> PI SGL for the entire command is attached to the first bio only. This >> works fine if the command is covered by a single bio, but results in >> integrity verification errors for the other bios in a multi-bio command. >> > > Did you hit this with a older distro kernel? > > It looks like iblock_get_bio will alloc a bio that has enough vecs for > the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to > me how does the bio_add_page call ever return a value other than > sg->length, and we end up doing another iblock_get_bio call? I hit it with the tip of Linus' tree, but it depended on some other in-flight changes. Those other changes are now in Linus' tree for 4.19, with the exception of [1]. Without [1], when doing a large read I/O through vhost + iblock to a T10 PI enabled device (I used scsi_debug), you first hit the vhost VHOST_SCSI_PREALLOC_PROT_SGLS limitation noted in [1]. Once the limitation on I/O size is no longer gated by VHOST_SCSI_PREALLOC_PROT_SGLS, the next issue I hit is the one this patch addresses. I should have been more precise in my commit message. The failure is actually a bio_integrity_alloc() failure to allocate the bip_vec when cmd->t_prot_nents exceeds 256 (BIO_MAX_PAGES), which results in the following failure on the host: [ 53.780723] Unable to allocate bio_integrity_payload and the following failure on the client: [ 28.724432] sd 0:0:1:0: [sda] tag#40 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 28.736127] sd 0:0:1:0: [sda] tag#40 Sense Key : Not Ready [current] [ 28.744567] sd 0:0:1:0: [sda] tag#40 Add. Sense: Logical unit communication failure [ 28.754724] sd 0:0:1:0: [sda] tag#40 CDB: Read(10) 28 20 00 00 00 00 00 38 00 00 [ 28.766190] print_req_error: I/O error, dev sda, sector 0 By splitting up the PI SGL across the bios, you avoid ever trying to allocate a too-large bip_vec (I was testing with 32 MiB I/Os). Here is how I am testing: L1 VM: # modprobe scsi_debug dif=1 dix=1 guard=0 dev_size_mb=6144 # targetcli <<EOF /backstores/block create dev=/dev/sda name=scsi_debug /vhost create wwn=naa.50014055d17a5a87 /vhost/naa.50014055d17a5a87/tpg1/luns/ create /backstores/block/scsi_debug EOF L2 VM is booted with QEMU vhost option 't10_pi=on', which depends on QEMU patches [2]: -device vhost-scsi-pci,wwpn=naa.50014055d17a5a87,t10_pi=on \ then in L2 VM: # cat /sys/block/sda/queue/max_hw_sectors_kb > /sys/block/sda/queue/max_sectors_kb # dd if=/dev/sda of=/dev/null bs=32M iflag=direct count=1 The end goal being to have a vehicle to test large I/Os through virtio_scsi to a PI enabled device. Greg [1] https://lists.linuxfoundation.org/pipermail/virtualization/2018-August/039040.html [2] https://lists.nongnu.org/archive/html/qemu-devel/2018-08/msg01298.html
On Tue, Aug 21, 2018 at 04:13:57PM -0600, Greg Edwards wrote: > On Tue, Aug 21, 2018 at 04:07:08PM -0500, Mike Christie wrote: >> On 08/08/2018 02:31 PM, Greg Edwards wrote: >>> When T10 PI is enabled on a backing device for the iblock backstore, the >>> PI SGL for the entire command is attached to the first bio only. This >>> works fine if the command is covered by a single bio, but results in >>> integrity verification errors for the other bios in a multi-bio command. >>> >> >> Did you hit this with a older distro kernel? >> >> It looks like iblock_get_bio will alloc a bio that has enough vecs for >> the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to >> me how does the bio_add_page call ever return a value other than >> sg->length, and we end up doing another iblock_get_bio call? > > I hit it with the tip of Linus' tree, but it depended on some other > in-flight changes. Those other changes are now in Linus' tree for 4.19, > with the exception of [1]. > > Without [1], when doing a large read I/O through vhost + iblock to a T10 > PI enabled device (I used scsi_debug), you first hit the vhost > VHOST_SCSI_PREALLOC_PROT_SGLS limitation noted in [1]. > > Once the limitation on I/O size is no longer gated by > VHOST_SCSI_PREALLOC_PROT_SGLS, the next issue I hit is the one this > patch addresses. I should have been more precise in my commit message. > The failure is actually a bio_integrity_alloc() failure to allocate the > bip_vec when cmd->t_prot_nents exceeds 256 (BIO_MAX_PAGES), which > results in the following failure on the host: > > [ 53.780723] Unable to allocate bio_integrity_payload Hi Mike, Hold off on looking at this patch for the moment. I took another look at it this morning, and I think I've found a vhost-scsi bug that is contributing to this (and changing VHOST_SCSI_PREALLOC_PROT_SGLS just papers around it). It looks like vhost-scsi should be capping the prot_iter in vhost_scsi_handle_vq(), something like: diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 76f8d649147b..1302869c506b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -973,6 +973,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (prot_bytes) { exp_data_len -= prot_bytes; prot_iter = data_iter; + iov_iter_truncate(&prot_iter, prot_bytes); iov_iter_advance(&data_iter, prot_bytes); } tag = vhost64_to_cpu(vq, v_req_pi.tag); Greg
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index ce1321a5cb7b..d3ab83282f61 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -635,7 +635,8 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) } static int -iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) +iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio, + struct scatterlist **sgl, unsigned int *skip) { struct se_device *dev = cmd->se_dev; struct blk_integrity *bi; @@ -643,6 +644,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct scatterlist *sg; int i, rc; + unsigned int size, nr_pages, len, off; bi = bdev_get_integrity(ib_dev->ibd_bd); if (!bi) { @@ -650,32 +652,52 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) return -ENODEV; } - bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents); + nr_pages = min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES); + bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); if (IS_ERR(bip)) { pr_err("Unable to allocate bio_integrity_payload\n"); return PTR_ERR(bip); } - bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) * - dev->prot_length; - bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); + bip_set_seed(bip, bio->bi_iter.bi_sector); pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size, (unsigned long long)bip->bip_iter.bi_sector); - for_each_sg(cmd->t_prot_sg, sg, cmd->t_prot_nents, i) { + size = bip->bip_iter.bi_size; + for_each_sg(*sgl, sg, nr_pages, i) { - rc = bio_integrity_add_page(bio, sg_page(sg), sg->length, - sg->offset); - if (rc != sg->length) { + len = sg->length - *skip; + off = sg->offset + *skip; + + if (*skip) + *skip = 0; + + if (len > size) { + len = size; + *skip = len; + } + + rc = bio_integrity_add_page(bio, sg_page(sg), len, off); + if (rc != len) { pr_err("bio_integrity_add_page() failed; %d\n", rc); return -ENOMEM; } pr_debug("Added bio integrity page: %p length: %d offset; %d\n", - sg_page(sg), sg->length, sg->offset); + sg_page(sg), len, off); + + size -= len; + if (!size) + break; } + if (*skip == 0) + *sgl = sg_next(sg); + else + *sgl = sg; + return 0; } @@ -686,12 +708,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct se_device *dev = cmd->se_dev; sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); struct iblock_req *ibr; - struct bio *bio, *bio_start; + struct bio *bio; struct bio_list list; - struct scatterlist *sg; + struct scatterlist *sg, *sg_prot = cmd->t_prot_sg; u32 sg_num = sgl_nents; - unsigned bio_cnt; - int i, op, op_flags = 0; + unsigned int bio_cnt, skip = 0; + int i, rc, op, op_flags = 0; if (data_direction == DMA_TO_DEVICE) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); @@ -726,7 +748,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (!bio) goto fail_free_ibr; - bio_start = bio; bio_list_init(&list); bio_list_add(&list, bio); @@ -741,6 +762,13 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, */ while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { + if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { + rc = iblock_alloc_bip(cmd, bio, &sg_prot, + &skip); + if (rc) + goto fail_put_bios; + } + if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) { iblock_submit_bios(&list); bio_cnt = 0; @@ -762,7 +790,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { - int rc = iblock_alloc_bip(cmd, bio_start); + rc = iblock_alloc_bip(cmd, bio, &sg_prot, &skip); if (rc) goto fail_put_bios; }
When T10 PI is enabled on a backing device for the iblock backstore, the PI SGL for the entire command is attached to the first bio only. This works fine if the command is covered by a single bio, but results in integrity verification errors for the other bios in a multi-bio command. Split the PI SGL across the bios in the command, so each bip contains the protection information for the data in the bio. In a multi-bio command, store where we left off in the PI SGL so we know where to start with the next bio. Signed-off-by: Greg Edwards <gedwards@ddn.com> --- This patch depends on commit 359f642700f2 ("block: move bio_integrity_{intervals,bytes} into blkdev.h") in Jens' block for-next branch. drivers/target/target_core_iblock.c | 60 +++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 16 deletions(-)