Message ID | 20180404085418.29205-3-damien.lemoal@wdc.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote: > Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() > is called from sd_probe_async() and since sd_revalidate_disk() calls > sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called > concurrently with operations referencing a drive zone bitmaps and number ^^^^^^^^^^^^^^^^^^^^ Should "a" be changed into "the"? > [Damien] Updated commit message and changed nr_zones/bitmap swap order. Updating the number of zones after having updated the bitmap pointers is not sufficient to avoid trouble if the number of zones as reported by the drive changes while I/O is in progress. With the current implementation if the number of zones changes the seq_zones_bitmap is cleared. Can this cause trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can reorder store operations. Even worse, a CPU could cache the zone bitmap pointers which means that at least RCU protection + kfree_rcu() is needed to avoid trouble. I think we either should handle this case properly or issue a kernel warning. Thanks, Bart.
Bart, On 4/5/18 00:22, Bart Van Assche wrote: > On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote: >> Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() >> is called from sd_probe_async() and since sd_revalidate_disk() calls >> sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called >> concurrently with operations referencing a drive zone bitmaps and number > ^^^^^^^^^^^^^^^^^^^^ > > Should "a" be changed into "the"? Yes. >> [Damien] Updated commit message and changed nr_zones/bitmap swap order. > > Updating the number of zones after having updated the bitmap pointers is not > sufficient to avoid trouble if the number of zones as reported by the drive > changes while I/O is in progress. With the current implementation if the > number of zones changes the seq_zones_bitmap is cleared. Can this cause > trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can > reorder store operations. Even worse, a CPU could cache the zone bitmap > pointers which means that at least RCU protection + kfree_rcu() is needed to > avoid trouble. I think we either should handle this case properly or issue a > kernel warning. OK. Let's work on that. -- Damien Le Moal, Western Digital
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index b59454ed5087..39ddbe92769c 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -551,14 +551,13 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. * @sdkp: target disk * * Allocate a zone bitmap and initialize it by identifying sequential zones. */ -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) +static unsigned long *sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) { - struct request_queue *q = sdkp->disk->queue; unsigned long *seq_zones_bitmap; sector_t lba = 0; unsigned char *buf; @@ -566,7 +565,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); if (!seq_zones_bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -589,12 +588,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) kfree(buf); if (ret) { kfree(seq_zones_bitmap); - return ret; + return ERR_PTR(ret); } - - q->seq_zones_bitmap = seq_zones_bitmap; - - return 0; + return seq_zones_bitmap; } static void sd_zbc_cleanup(struct scsi_disk *sdkp) @@ -630,24 +626,37 @@ static int sd_zbc_setup(struct scsi_disk *sdkp) * of zones changed. */ if (sdkp->nr_zones != q->nr_zones) { + struct request_queue *q = sdkp->disk->queue; + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + size_t zone_bitmap_size; - sd_zbc_cleanup(sdkp); - - q->nr_zones = sdkp->nr_zones; if (sdkp->nr_zones) { - q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); - if (!q->seq_zones_wlock) { + seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); + if (!seq_zones_wlock) { ret = -ENOMEM; goto err; } - ret = sd_zbc_setup_seq_zones_bitmap(sdkp); - if (ret) { - sd_zbc_cleanup(sdkp); + seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp); + if (IS_ERR(seq_zones_bitmap)) { + ret = PTR_ERR(seq_zones_bitmap); + kfree(seq_zones_wlock); goto err; } } - + zone_bitmap_size = BITS_TO_LONGS(sdkp->nr_zones) * + sizeof(unsigned long); + if (q->nr_zones != sdkp->nr_zones) { + swap(q->seq_zones_wlock, seq_zones_wlock); + swap(q->seq_zones_bitmap, seq_zones_bitmap); + q->nr_zones = sdkp->nr_zones; + } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size) != 0) { + memcpy(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size); + } + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); } return 0;