diff mbox series

[1/2] block: move dif_prepare/dif_complete functions to block layer

Message ID 1532252998-2375-1-git-send-email-maxg@mellanox.com (mailing list archive)
State New, archived
Headers show
Series [1/2] block: move dif_prepare/dif_complete functions to block layer | expand

Commit Message

Max Gurtovoy July 22, 2018, 9:49 a.m. UTC
Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well.

Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---
 block/blk-integrity.c  | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      |  12 ++++--
 drivers/scsi/sd.h      |   9 ----
 drivers/scsi/sd_dif.c  | 113 -------------------------------------------------
 include/linux/blkdev.h |  14 ++++++
 5 files changed, 134 insertions(+), 125 deletions(-)

Comments

Christoph Hellwig July 23, 2018, 7:28 a.m. UTC | #1
>  #include <linux/blkdev.h>
> +#include <linux/t10-pi.h>

Sounds like the new functions should move to block/10-pi.c given
that they are specific to the T10 defined formats.

> +/*
> + * The virtual start sector is the one that was originally submitted
> + * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
> + * actual physical start sector is likely to be different.  Remap
> + * protection information to match the physical LBA.
> + *
> + * From a protocol perspective there's a slight difference between
> + * Type 1 and 2.  The latter uses command's 32-byte exclusively, and the
> + * reference tag is seeded in the command.  This gives us the potential to
> + * avoid virt->phys remapping during write.  However, at read time we
> + * don't know whether the virt sector is the same as when we wrote it
> + * (we could be reading from real disk as opposed to MD/DM device.  So
> + * we always remap Type 2 making it identical to Type 1.
> + *
> + * Type 3 does not have a reference tag so no remapping is required.
> + */

Maybe add proper kerneldoc comments given that these are exported
functions?

The 32-byte CDB comment doesn't really make sense here as it is SCSI
specific, so we'll need to drop it or find a good place for it in the
SCSI code.

> +void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
> +			       u32 ref_tag)
> +{

Maybe call this blk_t10_pi_prepare?

> +	const int tuple_sz = sizeof(struct t10_pi_tuple);
> +	struct bio *bio;
> +	struct t10_pi_tuple *pi;
> +	u32 phys, virt;
> +
> +	if (protection_type == T10_PI_TYPE3_PROTECTION)
> +		return;
> +
> +	phys = ref_tag;

Seems like we could just use the ref_tag variable later instead of
duplicating it.

> +
> +	__rq_for_each_bio(bio, rq) {
> +		struct bio_integrity_payload *bip = bio_integrity(bio);
> +		struct bio_vec iv;
> +		struct bvec_iter iter;
> +		unsigned int j;
> +
> +		/* Already remapped? */
> +		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
> +			break;
> +
> +		virt = bip_get_seed(bip) & 0xffffffff;

Looks like we could keep the virt variable inside the loop and assign
it where declared, e.g.:

		u32 virt = bip_get_seed(bip) & 0xffffffff;

at the beginning of this block.

> +		bip_for_each_vec(iv, bip, iter) {
> +			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;

Pi can have local scope here, too.

> +			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
> +
> +				if (be32_to_cpu(pi->ref_tag) == virt)
> +					pi->ref_tag = cpu_to_be32(phys);
> +
> +				virt++;
> +				phys++;
> +			}

No need for the empty lines inside this loop.

> +/*
> + * Remap physical sector values in the reference tag to the virtual
> + * values expected by the block layer.
> + */
> +void blk_integrity_dif_complete(struct request *rq, u8 protection_type,
> +				u32 ref_tag, unsigned int intervals)

And pretty much all the  comments apply to this function as well.

> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 9421d9877730..4186bf027c59 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1119,7 +1119,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>  		SCpnt->cmnd[0] = WRITE_6;
>  
>  		if (blk_integrity_rq(rq))
> -			sd_dif_prepare(SCpnt);
> +			blk_integrity_dif_prepare(SCpnt->request,
> +						  sdkp->protection_type,
> +						  scsi_prot_ref_tag(SCpnt));

scsi_prot_ref_tag could be move to the block layer as it only uses
the sector in the eequest and the sector size, which we can get
from the gendisk as well.  We then don't need to pass it to the function.

We could also move the protection type to the gendisk, although I'm not
sure it's going to be worth it.
Keith Busch July 23, 2018, 2:02 p.m. UTC | #2
On Sun, Jul 22, 2018 at 12:49:57PM +0300, Max Gurtovoy wrote:
> +void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
> +			       u32 ref_tag)
> +{
> +	const int tuple_sz = sizeof(struct t10_pi_tuple);
> +	struct bio *bio;
> +	struct t10_pi_tuple *pi;
> +	u32 phys, virt;
> +
> +	if (protection_type == T10_PI_TYPE3_PROTECTION)
> +		return;
> +
> +	phys = ref_tag;
> +
> +	__rq_for_each_bio(bio, rq) {
> +		struct bio_integrity_payload *bip = bio_integrity(bio);
> +		struct bio_vec iv;
> +		struct bvec_iter iter;
> +		unsigned int j;
> +
> +		/* Already remapped? */
> +		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
> +			break;
> +
> +		virt = bip_get_seed(bip) & 0xffffffff;
> +
> +		bip_for_each_vec(iv, bip, iter) {
> +			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
> +
> +			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {

nvme's data integrity buffer can actually have more space between each
PI field, so we just need to account for that when iterating instead of
assuming each element is the size of a T10 PI tuple.

Otherwise, great idea.
Martin K. Petersen July 24, 2018, 1:54 a.m. UTC | #3
Christoph,

>> +void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
>> +			       u32 ref_tag)
>> +{
>
> Maybe call this blk_t10_pi_prepare?

The rest of these functions have a blk_integrity_ prefix. So either
stick with that or put the functions in t10-pi.c and use a t10_pi_
prefix.

I'm a bit torn on placement since the integrity metadata could contain
other stuff than T10 PI. But the remapping is very specific to T10 PI.

>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index 9421d9877730..4186bf027c59 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -1119,7 +1119,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>>  		SCpnt->cmnd[0] = WRITE_6;
>>  
>>  		if (blk_integrity_rq(rq))
>> -			sd_dif_prepare(SCpnt);
>> +			blk_integrity_dif_prepare(SCpnt->request,
>> +						  sdkp->protection_type,
>> +						  scsi_prot_ref_tag(SCpnt));
>
> scsi_prot_ref_tag could be move to the block layer as it only uses
> the sector in the eequest and the sector size, which we can get
> from the gendisk as well.  We then don't need to pass it to the function.

For Type 2, the PI can be at intervals different from the logical block
size (although we don't support that yet). We should use the
blk_integrity profile interval instead of assuming sector size.

And wrt. Keith's comment: The tuple_size should be the one from the
integrity profile as well, not sizeof(struct t10_pi_tuple).
Christoph Hellwig July 24, 2018, 7:57 a.m. UTC | #4
On Mon, Jul 23, 2018 at 09:54:38PM -0400, Martin K. Petersen wrote:
> 
> Christoph,
> 
> >> +void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
> >> +			       u32 ref_tag)
> >> +{
> >
> > Maybe call this blk_t10_pi_prepare?
> 
> The rest of these functions have a blk_integrity_ prefix. So either
> stick with that or put the functions in t10-pi.c and use a t10_pi_
> prefix.

Yes, I suggested moving it somewhere in my reply.
Max Gurtovoy July 24, 2018, 12:01 p.m. UTC | #5
On 7/24/2018 4:54 AM, Martin K. Petersen wrote:
> 
> Christoph,
> 
>>> +void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
>>> +			       u32 ref_tag)
>>> +{
>>
>> Maybe call this blk_t10_pi_prepare?
> 
> The rest of these functions have a blk_integrity_ prefix. So either
> stick with that or put the functions in t10-pi.c and use a t10_pi_
> prefix.
> 
> I'm a bit torn on placement since the integrity metadata could contain
> other stuff than T10 PI. But the remapping is very specific to T10 PI.
> 
>>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>>> index 9421d9877730..4186bf027c59 100644
>>> --- a/drivers/scsi/sd.c
>>> +++ b/drivers/scsi/sd.c
>>> @@ -1119,7 +1119,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>>>   		SCpnt->cmnd[0] = WRITE_6;
>>>   
>>>   		if (blk_integrity_rq(rq))
>>> -			sd_dif_prepare(SCpnt);
>>> +			blk_integrity_dif_prepare(SCpnt->request,
>>> +						  sdkp->protection_type,
>>> +						  scsi_prot_ref_tag(SCpnt));
>>
>> scsi_prot_ref_tag could be move to the block layer as it only uses
>> the sector in the eequest and the sector size, which we can get
>> from the gendisk as well.  We then don't need to pass it to the function.
> 
> For Type 2, the PI can be at intervals different from the logical block
> size (although we don't support that yet). We should use the
> blk_integrity profile interval instead of assuming sector size.
> 
> And wrt. Keith's comment: The tuple_size should be the one from the
> integrity profile as well, not sizeof(struct t10_pi_tuple).
> 

Ok so I'll use rq->q->integrity.tuple_size as the tuple_sz and increment 
j and pi according to it, right ?
diff mbox series

Patch

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 6121611e1316..66b095a866d3 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -21,6 +21,7 @@ 
  */
 
 #include <linux/blkdev.h>
+#include <linux/t10-pi.h>
 #include <linux/backing-dev.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
@@ -451,3 +452,113 @@  void blk_integrity_del(struct gendisk *disk)
 	kobject_del(&disk->integrity_kobj);
 	kobject_put(&disk->integrity_kobj);
 }
+
+/*
+ * The virtual start sector is the one that was originally submitted
+ * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
+ * actual physical start sector is likely to be different.  Remap
+ * protection information to match the physical LBA.
+ *
+ * From a protocol perspective there's a slight difference between
+ * Type 1 and 2.  The latter uses command's 32-byte exclusively, and the
+ * reference tag is seeded in the command.  This gives us the potential to
+ * avoid virt->phys remapping during write.  However, at read time we
+ * don't know whether the virt sector is the same as when we wrote it
+ * (we could be reading from real disk as opposed to MD/DM device.  So
+ * we always remap Type 2 making it identical to Type 1.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
+			       u32 ref_tag)
+{
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
+	struct bio *bio;
+	struct t10_pi_tuple *pi;
+	u32 phys, virt;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	phys = ref_tag;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		struct bio_vec iv;
+		struct bvec_iter iter;
+		unsigned int j;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		virt = bip_get_seed(bip) & 0xffffffff;
+
+		bip_for_each_vec(iv, bip, iter) {
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
+
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
+
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(phys);
+
+				virt++;
+				phys++;
+			}
+
+			kunmap_atomic(pi);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+EXPORT_SYMBOL(blk_integrity_dif_prepare);
+
+/*
+ * Remap physical sector values in the reference tag to the virtual
+ * values expected by the block layer.
+ */
+void blk_integrity_dif_complete(struct request *rq, u8 protection_type,
+				u32 ref_tag, unsigned int intervals)
+{
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
+	struct bio *bio;
+	struct t10_pi_tuple *pi;
+	unsigned int j;
+	u32 phys, virt;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	phys = ref_tag;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		virt = bip_get_seed(bip) & 0xffffffff;
+
+		bip_for_each_vec(iv, bip, iter) {
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
+
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
+
+				if (intervals == 0) {
+					kunmap_atomic(pi);
+					return;
+				}
+
+				if (be32_to_cpu(pi->ref_tag) == phys)
+					pi->ref_tag = cpu_to_be32(virt);
+
+				virt++;
+				phys++;
+				intervals--;
+			}
+
+			kunmap_atomic(pi);
+		}
+	}
+}
+EXPORT_SYMBOL(blk_integrity_dif_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d9877730..4186bf027c59 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,9 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(SCpnt);
+			blk_integrity_dif_prepare(SCpnt->request,
+						  sdkp->protection_type,
+						  scsi_prot_ref_tag(SCpnt));
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2049,12 @@  static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
-		sd_dif_complete(SCpnt, good_bytes);
+	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+	    good_bytes)
+		blk_integrity_dif_complete(SCpnt->request,
+				sdkp->protection_type,
+				scsi_prot_ref_tag(SCpnt),
+				good_bytes / scsi_prot_interval(SCpnt));
 
 	return good_bytes;
 }
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d078ae3..a7d4f50b67d4 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@  static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 9035380c0dda..db72c82486e3 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@  void sd_dif_config_host(struct scsi_disk *sdkp)
 	blk_integrity_register(disk, &bi);
 }
 
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different.  Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2.  The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB.  This gives us the potential to
- * avoid virt->phys remapping during write.  However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device.  So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct bio *bio;
-	struct scsi_disk *sdkp;
-	struct t10_pi_tuple *pi;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
-	phys = scsi_prot_ref_tag(scmd);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-		unsigned int j;
-
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(phys);
-
-				virt++;
-				phys++;
-			}
-
-			kunmap_atomic(pi);
-		}
-
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
-	}
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct scsi_disk *sdkp;
-	struct bio *bio;
-	struct t10_pi_tuple *pi;
-	unsigned int j, intervals;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
-		return;
-
-	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = scsi_prot_ref_tag(scmd);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (intervals == 0) {
-					kunmap_atomic(pi);
-					return;
-				}
-
-				if (be32_to_cpu(pi->ref_tag) == phys)
-					pi->ref_tag = cpu_to_be32(virt);
-
-				virt++;
-				phys++;
-				intervals--;
-			}
-
-			kunmap_atomic(pi);
-		}
-	}
-}
-
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..18f3ca17d4f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1823,6 +1823,10 @@  extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
 				   struct request *);
 extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
 				    struct bio *);
+extern void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
+			       u32 ref_tag);
+extern void blk_integrity_dif_complete(struct request *rq, u8 protection_type,
+				u32 ref_tag, unsigned int intervals);
 
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
@@ -1950,6 +1954,16 @@  static inline bool integrity_req_gap_front_merge(struct request *req,
 	return false;
 }
 
+void blk_integrity_dif_prepare(struct request *rq, u8 protection_type,
+			       u32 ref_tag)
+{
+}
+
+void blk_integrity_dif_complete(struct request *rq, u8 protection_type,
+				u32 ref_tag, unsigned int intervals)
+{
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct block_device_operations {