diff mbox

[v2.1] sd: Micro-optimize READ / WRITE CDB encoding

Message ID 20171025035222.6272-1-dgilbert@interlog.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Douglas Gilbert Oct. 25, 2017, 3:52 a.m. UTC
The sd_setup_read_write_cmnd() function is on the "fast path" for
block system access to SCSI devices (logical units). Rewrite this
function to improve speed and readability:
 - use put_unaligned_be family of functions to save lots of shifts
 - improve the scaling code when sector_size > 512 bytes
 - use variable names containing "sect" for block system quantities
   which have implicit 512 byte sector size. Use "lba" and
   "num_blks" after optional scaling to match the logical block
   address and number of logical blocks of the SCSI device being
   accessed
 - use local variables to hold values that were previously calculated
   more than once

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/sd.c | 216 ++++++++++++++++++++++++------------------------------
 1 file changed, 94 insertions(+), 122 deletions(-)

Comments

Hannes Reinecke Oct. 25, 2017, 7:28 a.m. UTC | #1
On 10/25/2017 05:52 AM, Douglas Gilbert wrote:
> The sd_setup_read_write_cmnd() function is on the "fast path" for
> block system access to SCSI devices (logical units). Rewrite this
> function to improve speed and readability:
>  - use put_unaligned_be family of functions to save lots of shifts
>  - improve the scaling code when sector_size > 512 bytes
>  - use variable names containing "sect" for block system quantities
>    which have implicit 512 byte sector size. Use "lba" and
>    "num_blks" after optional scaling to match the logical block
>    address and number of logical blocks of the SCSI device being
>    accessed
>  - use local variables to hold values that were previously calculated
>    more than once
> 
> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
> ---
>  drivers/scsi/sd.c | 216 ++++++++++++++++++++++++------------------------------
>  1 file changed, 94 insertions(+), 122 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index d175c5c5ccf8..618cb5d0f75a 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1004,11 +1004,18 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>  	struct scsi_device *sdp = SCpnt->device;
>  	struct gendisk *disk = rq->rq_disk;
>  	struct scsi_disk *sdkp = scsi_disk(disk);
> -	sector_t block = blk_rq_pos(rq);
> -	sector_t threshold;
> -	unsigned int this_count = blk_rq_sectors(rq);
> +	unsigned int num_sects = blk_rq_sectors(rq);
> +	unsigned int num_blks;
>  	unsigned int dif, dix;
> -	bool zoned_write = sd_is_zoned(sdkp) && rq_data_dir(rq) == WRITE;
> +	unsigned int sect_sz;
> +	sector_t sect_addr = blk_rq_pos(rq);
> +	sector_t sect_after = sect_addr + num_sects;
> +	sector_t total_sects = get_capacity(disk);
> +	sector_t threshold_sect;
> +	sector_t lba;
> +	bool is_write = (rq_data_dir(rq) == WRITE);
> +	bool have_fua = !!(rq->cmd_flags & REQ_FUA);
> +	bool zoned_write = sd_is_zoned(sdkp) && is_write;
>  	int ret;
>  	unsigned char protect;
>  
> @@ -1029,20 +1036,23 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>  
>  	SCSI_LOG_HLQUEUE(1,
>  		scmd_printk(KERN_INFO, SCpnt,
> -			"%s: block=%llu, count=%d\n",
> -			__func__, (unsigned long long)block, this_count));
> +			"%s: sector=%llu, num_sects=%d\n",
> +			__func__, (unsigned long long)sect_addr, num_sects));
>  
> -	if (!sdp || !scsi_device_online(sdp) ||
> -	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
> +	if (likely(sdp && scsi_device_online(sdp) &&
> +		   (sect_after <= total_sects)))
> +		; /* ok: have device, its online and access fits on medium */
> +	else {
>  		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
>  						"Finishing %u sectors\n",
> -						blk_rq_sectors(rq)));
> +						num_sects));
>  		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
>  						"Retry with 0x%p\n", SCpnt));
>  		goto out;
>  	}
> +	sect_sz = sdp->sector_size;
>  
> -	if (sdp->changed) {
> +	if (unlikely(sdp->changed)) {
>  		/*
>  		 * quietly refuse to do anything to a changed disc until 
>  		 * the changed bit has been reset
Please invert the 'if' condition to avoid the empty branch.

> @@ -1055,21 +1065,22 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>  	 * Some SD card readers can't handle multi-sector accesses which touch
>  	 * the last one or two hardware sectors.  Split accesses as needed.
>  	 */
> -	threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
> -		(sdp->sector_size / 512);
> -
> -	if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
> -		if (block < threshold) {
> -			/* Access up to the threshold but not beyond */
> -			this_count = threshold - block;
> -		} else {
> -			/* Access only a single hardware sector */
> -			this_count = sdp->sector_size / 512;
> -		}
> +	if (unlikely(sdp->last_sector_bug)) {
> +		threshold_sect = total_sects -
> +			    (SD_LAST_BUGGY_SECTORS * (sect_sz / 512));
> +
> +		if (unlikely(sect_after > threshold_sect))
> +			num_sects = (sect_addr < threshold_sect) ?
> +						(threshold_sect - sect_addr) :
> +						(sect_sz / 512);
> +			/* If LBA less than threshold then access up to the
> +			 * threshold but not beyond; otherwise access only
> +			 * a single hardware sector.
> +			 */
>  	}
>  
> -	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
> -					(unsigned long long)block));
> +	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "num_sects=%llu\n",
> +					(unsigned long long)num_sects));
>  
>  	/*
>  	 * If we have a 1K hardware sectorsize, prevent access to single
Why did you change the log message?
I'd rather keep it as it were...

Cheers,

Hannes
Douglas Gilbert Oct. 25, 2017, 8:39 p.m. UTC | #2
On 2017-10-25 03:28 AM, Hannes Reinecke wrote:
> On 10/25/2017 05:52 AM, Douglas Gilbert wrote:
>> The sd_setup_read_write_cmnd() function is on the "fast path" for
>> block system access to SCSI devices (logical units). Rewrite this
>> function to improve speed and readability:
>>   - use put_unaligned_be family of functions to save lots of shifts
>>   - improve the scaling code when sector_size > 512 bytes
>>   - use variable names containing "sect" for block system quantities
>>     which have implicit 512 byte sector size. Use "lba" and
>>     "num_blks" after optional scaling to match the logical block
>>     address and number of logical blocks of the SCSI device being
>>     accessed
>>   - use local variables to hold values that were previously calculated
>>     more than once
>>
>> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
>> ---
>>   drivers/scsi/sd.c | 216 ++++++++++++++++++++++++------------------------------
>>   1 file changed, 94 insertions(+), 122 deletions(-)
>>
>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index d175c5c5ccf8..618cb5d0f75a 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -1004,11 +1004,18 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>>   	struct scsi_device *sdp = SCpnt->device;
>>   	struct gendisk *disk = rq->rq_disk;
>>   	struct scsi_disk *sdkp = scsi_disk(disk);
>> -	sector_t block = blk_rq_pos(rq);
>> -	sector_t threshold;
>> -	unsigned int this_count = blk_rq_sectors(rq);
>> +	unsigned int num_sects = blk_rq_sectors(rq);
>> +	unsigned int num_blks;
>>   	unsigned int dif, dix;
>> -	bool zoned_write = sd_is_zoned(sdkp) && rq_data_dir(rq) == WRITE;
>> +	unsigned int sect_sz;
>> +	sector_t sect_addr = blk_rq_pos(rq);
>> +	sector_t sect_after = sect_addr + num_sects;
>> +	sector_t total_sects = get_capacity(disk);
>> +	sector_t threshold_sect;
>> +	sector_t lba;
>> +	bool is_write = (rq_data_dir(rq) == WRITE);
>> +	bool have_fua = !!(rq->cmd_flags & REQ_FUA);
>> +	bool zoned_write = sd_is_zoned(sdkp) && is_write;
>>   	int ret;
>>   	unsigned char protect;
>>   
>> @@ -1029,20 +1036,23 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>>   
>>   	SCSI_LOG_HLQUEUE(1,
>>   		scmd_printk(KERN_INFO, SCpnt,
>> -			"%s: block=%llu, count=%d\n",
>> -			__func__, (unsigned long long)block, this_count));
>> +			"%s: sector=%llu, num_sects=%d\n",
>> +			__func__, (unsigned long long)sect_addr, num_sects));
>>   
>> -	if (!sdp || !scsi_device_online(sdp) ||
>> -	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
>> +	if (likely(sdp && scsi_device_online(sdp) &&
>> +		   (sect_after <= total_sects)))
>> +		; /* ok: have device, its online and access fits on medium */
>> +	else {
>>   		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
>>   						"Finishing %u sectors\n",
>> -						blk_rq_sectors(rq)));
>> +						num_sects));
>>   		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
>>   						"Retry with 0x%p\n", SCpnt));
>>   		goto out;
>>   	}
>> +	sect_sz = sdp->sector_size;
>>   
>> -	if (sdp->changed) {
>> +	if (unlikely(sdp->changed)) {
>>   		/*
>>   		 * quietly refuse to do anything to a changed disc until
>>   		 * the changed bit has been reset
> Please invert the 'if' condition to avoid the empty branch.

I'm guessing you meant the previous "if" (i.e. the one checking the device
is online and the access fits). Okay.

Are (likely) empty branches bad for performance if they have the slight
benefit of improving readability?

>> @@ -1055,21 +1065,22 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
>>   	 * Some SD card readers can't handle multi-sector accesses which touch
>>   	 * the last one or two hardware sectors.  Split accesses as needed.
>>   	 */
>> -	threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
>> -		(sdp->sector_size / 512);
>> -
>> -	if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
>> -		if (block < threshold) {
>> -			/* Access up to the threshold but not beyond */
>> -			this_count = threshold - block;
>> -		} else {
>> -			/* Access only a single hardware sector */
>> -			this_count = sdp->sector_size / 512;
>> -		}
>> +	if (unlikely(sdp->last_sector_bug)) {
>> +		threshold_sect = total_sects -
>> +			    (SD_LAST_BUGGY_SECTORS * (sect_sz / 512));
>> +
>> +		if (unlikely(sect_after > threshold_sect))
>> +			num_sects = (sect_addr < threshold_sect) ?
>> +						(threshold_sect - sect_addr) :
>> +						(sect_sz / 512);
>> +			/* If LBA less than threshold then access up to the
>> +			 * threshold but not beyond; otherwise access only
>> +			 * a single hardware sector.
>> +			 */
>>   	}
>>   
>> -	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
>> -					(unsigned long long)block));
>> +	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "num_sects=%llu\n",
>> +					(unsigned long long)num_sects));
>>   
>>   	/*
>>   	 * If we have a 1K hardware sectorsize, prevent access to single
> Why did you change the log message?
> I'd rather keep it as it were...

Yes, I did change it ... and that was probably a mistake. Actually that
whole logging statement is a useless repetition IMO.

The original code had this near the top of sd_setup_rw_cmd():

         SCSI_LOG_HLQUEUE(1,
                 scmd_printk(KERN_INFO, SCpnt,
                         "%s: block=%llu, count=%d\n",
                         __func__, (unsigned long long)block, this_count));

plus the log statement you are commenting on, in the middle, both
at main scope:

         SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
                                         (unsigned long long)block));

That is before the scaling, so 'block' is the same in both cases.
So why the second log statement? It follows a condition that might
trim num_sects due to the the "last_sector_bug". But if it was
to highlight that trimming, then it should be inside that condition
and output num_sects (or this_count in the original code), not
the sector address, again.


It was reasonably obvious looking at this function that quite a few
people have edited it to add one feature or another. They may have
produced nice clean unified diffs that expedite approvals. However
they have muddied the code and made it difficult to follow and
in some cases less efficient that it should be.

New patch coming (after testing).

Thanks for your comments
Doug Gilbert
diff mbox

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index d175c5c5ccf8..618cb5d0f75a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1004,11 +1004,18 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	struct scsi_device *sdp = SCpnt->device;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp = scsi_disk(disk);
-	sector_t block = blk_rq_pos(rq);
-	sector_t threshold;
-	unsigned int this_count = blk_rq_sectors(rq);
+	unsigned int num_sects = blk_rq_sectors(rq);
+	unsigned int num_blks;
 	unsigned int dif, dix;
-	bool zoned_write = sd_is_zoned(sdkp) && rq_data_dir(rq) == WRITE;
+	unsigned int sect_sz;
+	sector_t sect_addr = blk_rq_pos(rq);
+	sector_t sect_after = sect_addr + num_sects;
+	sector_t total_sects = get_capacity(disk);
+	sector_t threshold_sect;
+	sector_t lba;
+	bool is_write = (rq_data_dir(rq) == WRITE);
+	bool have_fua = !!(rq->cmd_flags & REQ_FUA);
+	bool zoned_write = sd_is_zoned(sdkp) && is_write;
 	int ret;
 	unsigned char protect;
 
@@ -1029,20 +1036,23 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 
 	SCSI_LOG_HLQUEUE(1,
 		scmd_printk(KERN_INFO, SCpnt,
-			"%s: block=%llu, count=%d\n",
-			__func__, (unsigned long long)block, this_count));
+			"%s: sector=%llu, num_sects=%d\n",
+			__func__, (unsigned long long)sect_addr, num_sects));
 
-	if (!sdp || !scsi_device_online(sdp) ||
-	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
+	if (likely(sdp && scsi_device_online(sdp) &&
+		   (sect_after <= total_sects)))
+		; /* ok: have device, its online and access fits on medium */
+	else {
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 						"Finishing %u sectors\n",
-						blk_rq_sectors(rq)));
+						num_sects));
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 						"Retry with 0x%p\n", SCpnt));
 		goto out;
 	}
+	sect_sz = sdp->sector_size;
 
-	if (sdp->changed) {
+	if (unlikely(sdp->changed)) {
 		/*
 		 * quietly refuse to do anything to a changed disc until 
 		 * the changed bit has been reset
@@ -1055,21 +1065,22 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	 * Some SD card readers can't handle multi-sector accesses which touch
 	 * the last one or two hardware sectors.  Split accesses as needed.
 	 */
-	threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
-		(sdp->sector_size / 512);
-
-	if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
-		if (block < threshold) {
-			/* Access up to the threshold but not beyond */
-			this_count = threshold - block;
-		} else {
-			/* Access only a single hardware sector */
-			this_count = sdp->sector_size / 512;
-		}
+	if (unlikely(sdp->last_sector_bug)) {
+		threshold_sect = total_sects -
+			    (SD_LAST_BUGGY_SECTORS * (sect_sz / 512));
+
+		if (unlikely(sect_after > threshold_sect))
+			num_sects = (sect_addr < threshold_sect) ?
+						(threshold_sect - sect_addr) :
+						(sect_sz / 512);
+			/* If LBA less than threshold then access up to the
+			 * threshold but not beyond; otherwise access only
+			 * a single hardware sector.
+			 */
 	}
 
-	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
-					(unsigned long long)block));
+	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "num_sects=%llu\n",
+					(unsigned long long)num_sects));
 
 	/*
 	 * If we have a 1K hardware sectorsize, prevent access to single
@@ -1080,66 +1091,48 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	 * with the cdrom, since it is read-only.  For performance
 	 * reasons, the filesystems should be able to handle this
 	 * and not force the scsi disk driver to use bounce buffers
-	 * for this.
+	 * for this. Assume sect_sz >= 512 bytes.
 	 */
-	if (sdp->sector_size == 1024) {
-		if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
+	if (sect_sz > 512) {
+		if ((sect_addr | num_sects) & (sect_sz / 512 - 1)) {
 			scmd_printk(KERN_ERR, SCpnt,
-				    "Bad block number requested\n");
+				    "Bad sector requested: address = %llu, num_sects = %u, sector size = %u bytes\n",
+				    sect_addr + 0ULL, num_sects, sect_sz);
 			goto out;
 		} else {
-			block = block >> 1;
-			this_count = this_count >> 1;
-		}
-	}
-	if (sdp->sector_size == 2048) {
-		if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
-			scmd_printk(KERN_ERR, SCpnt,
-				    "Bad block number requested\n");
-			goto out;
-		} else {
-			block = block >> 2;
-			this_count = this_count >> 2;
-		}
-	}
-	if (sdp->sector_size == 4096) {
-		if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
-			scmd_printk(KERN_ERR, SCpnt,
-				    "Bad block number requested\n");
-			goto out;
-		} else {
-			block = block >> 3;
-			this_count = this_count >> 3;
+			int shift = ilog2(sect_sz / 512);
+
+			lba = sect_addr >> shift;
+			num_blks = num_sects >> shift;
 		}
+	} else {	/* So sector size is 512 bytes */
+		lba = sect_addr;
+		num_blks = num_sects;
 	}
-	if (rq_data_dir(rq) == WRITE) {
-		SCpnt->cmnd[0] = WRITE_6;
 
+	if (is_write) {
 		if (blk_integrity_rq(rq))
 			sd_dif_prepare(SCpnt);
-
-	} else if (rq_data_dir(rq) == READ) {
-		SCpnt->cmnd[0] = READ_6;
-	} else {
+	} else if (unlikely(rq_data_dir(rq) != READ)) {
 		scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq));
 		goto out;
 	}
 
 	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 					"%s %d/%u 512 byte blocks.\n",
-					(rq_data_dir(rq) == WRITE) ?
-					"writing" : "reading", this_count,
-					blk_rq_sectors(rq)));
+					is_write ?  "writing" : "reading",
+					num_blks, num_sects));
 
 	dix = scsi_prot_sg_count(SCpnt);
-	dif = scsi_host_dif_capable(SCpnt->device->host, sdkp->protection_type);
+	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
 
-	if (dif || dix)
+	if (unlikely(dif || dix))
 		protect = sd_setup_protect_cmnd(SCpnt, dix, dif);
 	else
 		protect = 0;
 
-	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
+	if (unlikely(protect &&
+		     sdkp->protection_type == T10_PI_TYPE2_PROTECTION)) {
 		SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
 
 		if (unlikely(SCpnt->cmnd == NULL)) {
@@ -1151,60 +1144,40 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		memset(SCpnt->cmnd, 0, SCpnt->cmd_len);
 		SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD;
 		SCpnt->cmnd[7] = 0x18;
-		SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
-		SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
-
-		/* LBA */
-		SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
-		SCpnt->cmnd[13] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
-		SCpnt->cmnd[14] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
-		SCpnt->cmnd[15] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0;
-		SCpnt->cmnd[16] = (unsigned char) (block >> 24) & 0xff;
-		SCpnt->cmnd[17] = (unsigned char) (block >> 16) & 0xff;
-		SCpnt->cmnd[18] = (unsigned char) (block >> 8) & 0xff;
-		SCpnt->cmnd[19] = (unsigned char) block & 0xff;
-
-		/* Expected Indirect LBA */
-		SCpnt->cmnd[20] = (unsigned char) (block >> 24) & 0xff;
-		SCpnt->cmnd[21] = (unsigned char) (block >> 16) & 0xff;
-		SCpnt->cmnd[22] = (unsigned char) (block >> 8) & 0xff;
-		SCpnt->cmnd[23] = (unsigned char) block & 0xff;
-
-		/* Transfer length */
-		SCpnt->cmnd[28] = (unsigned char) (this_count >> 24) & 0xff;
-		SCpnt->cmnd[29] = (unsigned char) (this_count >> 16) & 0xff;
-		SCpnt->cmnd[30] = (unsigned char) (this_count >> 8) & 0xff;
-		SCpnt->cmnd[31] = (unsigned char) this_count & 0xff;
-	} else if (sdp->use_16_for_rw || (this_count > 0xffff)) {
-		SCpnt->cmnd[0] += READ_16 - READ_6;
-		SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
-		SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
-		SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
-		SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
-		SCpnt->cmnd[5] = sizeof(block) > 4 ? (unsigned char) (block >> 32) & 0xff : 0;
-		SCpnt->cmnd[6] = (unsigned char) (block >> 24) & 0xff;
-		SCpnt->cmnd[7] = (unsigned char) (block >> 16) & 0xff;
-		SCpnt->cmnd[8] = (unsigned char) (block >> 8) & 0xff;
-		SCpnt->cmnd[9] = (unsigned char) block & 0xff;
-		SCpnt->cmnd[10] = (unsigned char) (this_count >> 24) & 0xff;
-		SCpnt->cmnd[11] = (unsigned char) (this_count >> 16) & 0xff;
-		SCpnt->cmnd[12] = (unsigned char) (this_count >> 8) & 0xff;
-		SCpnt->cmnd[13] = (unsigned char) this_count & 0xff;
-		SCpnt->cmnd[14] = SCpnt->cmnd[15] = 0;
-	} else if ((this_count > 0xff) || (block > 0x1fffff) ||
-		   scsi_device_protection(SCpnt->device) ||
-		   SCpnt->device->use_10_for_rw) {
-		SCpnt->cmnd[0] += READ_10 - READ_6;
-		SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
-		SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
-		SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
-		SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
-		SCpnt->cmnd[5] = (unsigned char) block & 0xff;
-		SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0;
-		SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
-		SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
+		SCpnt->cmnd[9] = is_write ? WRITE_32 : READ_32;
+		SCpnt->cmnd[10] = protect;
+		if (unlikely(have_fua))
+			SCpnt->cmnd[10] |= 0x8;
+		put_unaligned_be64(lba, SCpnt->cmnd + 12);
+
+		/* Expected initial LB reference tag: lower 4 bytes of LBA */
+		put_unaligned_be32(lba, SCpnt->cmnd + 20);
+		/* Leave Expected LB application tag and LB application tag
+		 * mask zeroed
+		 */
+
+		put_unaligned_be32(num_blks, SCpnt->cmnd + 28);
+	} else if (sdp->use_16_for_rw || unlikely(num_blks > 0xffff)) {
+		SCpnt->cmnd[0] = is_write ? WRITE_16 : READ_16;
+		SCpnt->cmnd[1] = protect;
+		if (unlikely(have_fua))
+			SCpnt->cmnd[1] |= 0x8;
+		put_unaligned_be64(lba, SCpnt->cmnd + 2);
+		put_unaligned_be32(num_blks, SCpnt->cmnd + 10);
+		SCpnt->cmnd[14] = 0;
+		SCpnt->cmnd[15] = 0;
+	} else if (sdp->use_10_for_rw || (num_blks > 0xff) ||
+		   (lba > 0x1fffff) || scsi_device_protection(sdp)) {
+		SCpnt->cmnd[0] = is_write ? WRITE_10 : READ_10;
+		SCpnt->cmnd[1] = protect;
+		if (unlikely(have_fua))
+			SCpnt->cmnd[1] |= 0x8;
+		put_unaligned_be32(lba, SCpnt->cmnd + 2);
+		SCpnt->cmnd[6] = 0;
+		put_unaligned_be16(num_blks, SCpnt->cmnd + 7);
+		SCpnt->cmnd[9] = 0;
 	} else {
-		if (unlikely(rq->cmd_flags & REQ_FUA)) {
+		if (unlikely(have_fua)) {
 			/*
 			 * This happens only if this drive failed
 			 * 10byte rw command with ILLEGAL_REQUEST
@@ -1215,22 +1188,21 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 				    "FUA write on READ/WRITE(6) drive\n");
 			goto out;
 		}
-
-		SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
-		SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
-		SCpnt->cmnd[3] = (unsigned char) block & 0xff;
-		SCpnt->cmnd[4] = (unsigned char) this_count;
+		/* rely on lba <= 0x1fffff so cmnd[1] will be zeroed */
+		put_unaligned_be32(lba, SCpnt->cmnd + 0);
+		SCpnt->cmnd[0] = is_write ? WRITE_6 : READ_6;
+		SCpnt->cmnd[4] = (unsigned char) num_blks;
 		SCpnt->cmnd[5] = 0;
 	}
-	SCpnt->sdb.length = this_count * sdp->sector_size;
+	SCpnt->sdb.length = num_blks * sect_sz;
 
 	/*
 	 * We shouldn't disconnect in the middle of a sector, so with a dumb
 	 * host adapter, it's safe to assume that we can at least transfer
 	 * this many bytes between each connect / disconnect.
 	 */
-	SCpnt->transfersize = sdp->sector_size;
-	SCpnt->underflow = this_count << 9;
+	SCpnt->transfersize = sect_sz;
+	SCpnt->underflow = num_blks << 9;
 	SCpnt->allowed = SD_MAX_RETRIES;
 
 	/*