diff mbox series

[4/4] block: Generically handle report zones buffer

Message ID 20191024065006.8684-5-damien.lemoal@wdc.com (mailing list archive)
State New, archived
Headers show
Series zoned block device report zones enhancements | expand

Commit Message

Damien Le Moal Oct. 24, 2019, 6:50 a.m. UTC
Instead of relying on a zoned block device driver to allocate a buffer
for every execution of a report zones command execution, rely on the
block layer use of the device zone report queue limits to allocate a
buffer and keep it around when the device report_zones method is
executed in a loop, e.g.  as in blk_revalidate_disk_zones().

This simplifies the code in the scsi sd_zbc driver as well as simplify
the addition of zone supports for upcoming new zoned device drivers.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 block/blk-zoned.c              | 99 ++++++++++++++++++++--------------
 drivers/block/null_blk.h       |  6 ++-
 drivers/block/null_blk_zoned.c |  3 +-
 drivers/md/dm.c                |  3 +-
 drivers/scsi/sd.h              |  3 +-
 drivers/scsi/sd_zbc.c          | 61 ++++++---------------
 include/linux/blkdev.h         |  8 +--
 7 files changed, 88 insertions(+), 95 deletions(-)

Comments

Christoph Hellwig Oct. 24, 2019, 7:13 a.m. UTC | #1
On Thu, Oct 24, 2019 at 03:50:06PM +0900, Damien Le Moal wrote:
> Instead of relying on a zoned block device driver to allocate a buffer
> for every execution of a report zones command execution, rely on the
> block layer use of the device zone report queue limits to allocate a
> buffer and keep it around when the device report_zones method is
> executed in a loop, e.g.  as in blk_revalidate_disk_zones().
> 
> This simplifies the code in the scsi sd_zbc driver as well as simplify
> the addition of zone supports for upcoming new zoned device drivers.

I wonder if we could just do away with the separate buffer entirely.
As the SCSI zone size (and also ATA even if we don't directly talk
to that) are intentionally the same size as the blk_zone (and the
same true is for the only upcoming standard I know of) we can just
rewrite each entry in-place(-ish) by reusing the same allocation.
Depending on the detailed formate we have to copy a field our two
onto the stack first, but it both avoids the extra allocation, and
the whole queue limits infrastructure in the previous patch and
should simply the code a lot.
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 43bfd1be0985..6bddaa505df0 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -97,6 +97,29 @@  unsigned int blkdev_nr_zones(struct block_device *bdev)
 }
 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
 
+/*
+ * Allocate a buffer to execute report zones.
+ */
+static void *blk_alloc_report_buffer(struct request_queue *q,
+				     unsigned int *nr_zones, size_t *buflen)
+{
+	unsigned int nrz = *nr_zones;
+	size_t bufsize = nrz * q->limits.zone_descriptor_size;
+	void *buf;
+
+	if (q->limits.zones_report_granularity)
+		bufsize = roundup(bufsize, q->limits.zones_report_granularity);
+	bufsize = min_t(size_t, bufsize, q->limits.max_zones_report_size);
+	buf = vzalloc(bufsize);
+	if (buf) {
+		*buflen = bufsize;
+		*nr_zones = min_t(unsigned int, nrz,
+				  bufsize / q->limits.zone_descriptor_size);
+	}
+
+	return buf;
+}
+
 /*
  * Check that a zone report belongs to this partition, and if yes, fix its start
  * sector and write pointer and return true. Return false otherwise.
@@ -140,7 +163,10 @@  int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct gendisk *disk = bdev->bd_disk;
-	unsigned int i, nrz;
+	unsigned int i, nrz = *nr_zones;
+	sector_t capacity = bdev->bd_part->nr_sects;
+	size_t buflen = 0;
+	void *buf = NULL;
 	int ret;
 
 	if (!blk_queue_is_zoned(q))
@@ -154,27 +180,33 @@  int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 	if (WARN_ON_ONCE(!disk->fops->report_zones))
 		return -EOPNOTSUPP;
 
-	if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
+	if (!nrz || sector >= capacity) {
 		*nr_zones = 0;
 		return 0;
 	}
 
-	nrz = min(*nr_zones,
-		  __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
-	ret = disk->fops->report_zones(disk, get_start_sect(bdev) + sector,
-				       zones, &nrz);
+	nrz = min(nrz, __blkdev_nr_zones(q, capacity - sector));
+	if (q->limits.zone_descriptor_size) {
+		buf = blk_alloc_report_buffer(q, &nrz, &buflen);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	ret = disk->fops->report_zones(disk, sector, zones, &nrz, buf, buflen);
 	if (ret)
-		return ret;
+		goto out;
 
 	for (i = 0; i < nrz; i++) {
 		if (!blkdev_report_zone(bdev, zones))
 			break;
 		zones++;
 	}
-
 	*nr_zones = i;
 
-	return 0;
+out:
+	kvfree(buf);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
@@ -384,31 +416,6 @@  static inline unsigned long *blk_alloc_zone_bitmap(int node,
 			    GFP_NOIO, node);
 }
 
-/*
- * Allocate an array of struct blk_zone to get nr_zones zone information.
- * The allocated array may be smaller than nr_zones.
- */
-static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
-{
-	struct blk_zone *zones;
-	size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
-
-	/*
-	 * GFP_KERNEL here is meaningless as the caller task context has
-	 * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
-	 * with memalloc_noio_save().
-	 */
-	zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
-	if (!zones) {
-		*nr_zones = 0;
-		return NULL;
-	}
-
-	*nr_zones = nrz;
-
-	return zones;
-}
-
 void blk_queue_free_zone_bitmaps(struct request_queue *q)
 {
 	kfree(q->seq_zones_bitmap);
@@ -482,10 +489,12 @@  int blk_revalidate_disk_zones(struct gendisk *disk)
 	struct request_queue *q = disk->queue;
 	unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
 	unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
-	unsigned int i, rep_nr_zones = 0, z = 0, nrz;
+	unsigned int i, rep_nr_zones, z = 0, nrz;
 	struct blk_zone *zones = NULL;
 	unsigned int noio_flag;
 	sector_t sector = 0;
+	size_t buflen = 0;
+	void *buf = NULL;
 	int ret = 0;
 
 	/*
@@ -518,17 +527,28 @@  int blk_revalidate_disk_zones(struct gendisk *disk)
 		goto out;
 
 	/*
-	 * Get zone information to check the zones and initialize
-	 * seq_zones_bitmap.
+	 * Allocate a report buffer for the driver execution of report zones
+	 * and an array of zones to get the report back.
 	 */
 	rep_nr_zones = nr_zones;
-	zones = blk_alloc_zones(&rep_nr_zones);
+	if (q->limits.zone_descriptor_size) {
+		buf = blk_alloc_report_buffer(q, &rep_nr_zones, &buflen);
+		if (!buf)
+			goto out;
+	}
+
+	zones = kvcalloc(rep_nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
 	if (!zones)
 		goto out;
 
+	/*
+	 * Get zone information to check the zones and initialize
+	 * seq_zones_bitmap.
+	 */
 	while (z < nr_zones) {
 		nrz = min(nr_zones - z, rep_nr_zones);
-		ret = disk->fops->report_zones(disk, sector, zones, &nrz);
+		ret = disk->fops->report_zones(disk, sector, zones, &nrz,
+					       buf, buflen);
 		if (ret)
 			goto out;
 		if (!nrz)
@@ -565,6 +585,7 @@  int blk_revalidate_disk_zones(struct gendisk *disk)
 	memalloc_noio_restore(noio_flag);
 
 	kvfree(zones);
+	kvfree(buf);
 	kfree(seq_zones_wlock);
 	kfree(seq_zones_bitmap);
 
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 93c2a3d403da..6bd0482ec683 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -92,7 +92,8 @@  struct nullb {
 int null_zone_init(struct nullb_device *dev);
 void null_zone_exit(struct nullb_device *dev);
 int null_zone_report(struct gendisk *disk, sector_t sector,
-		     struct blk_zone *zones, unsigned int *nr_zones);
+		     struct blk_zone *zones, unsigned int *nr_zones,
+		     void *buf, size_t buflen);
 blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
 				enum req_opf op, sector_t sector,
 				sector_t nr_sectors);
@@ -107,7 +108,8 @@  static inline int null_zone_init(struct nullb_device *dev)
 static inline void null_zone_exit(struct nullb_device *dev) {}
 static inline int null_zone_report(struct gendisk *disk, sector_t sector,
 				   struct blk_zone *zones,
-				   unsigned int *nr_zones)
+				   unsigned int *nr_zones,
+				   void *buf, size_t buflen)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 4e56b17ed3ef..446e083be240 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -67,7 +67,8 @@  void null_zone_exit(struct nullb_device *dev)
 }
 
 int null_zone_report(struct gendisk *disk, sector_t sector,
-		     struct blk_zone *zones, unsigned int *nr_zones)
+		     struct blk_zone *zones, unsigned int *nr_zones,
+		     void *buf, size_t buflen)
 {
 	struct nullb *nullb = disk->private_data;
 	struct nullb_device *dev = nullb->dev;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 647aa5b0233b..5d5a297ceeb1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -441,7 +441,8 @@  static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
-			       struct blk_zone *zones, unsigned int *nr_zones)
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       void *buf, size_t buflen)
 {
 #ifdef CONFIG_BLK_DEV_ZONED
 	struct mapped_device *md = disk->private_data;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 1eab779f812b..b948656b6882 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -213,7 +213,8 @@  extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all);
 extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 			    struct scsi_sense_hdr *sshdr);
 extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
-			       struct blk_zone *zones, unsigned int *nr_zones);
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       void *buf, size_t buflen);
 
 #else /* CONFIG_BLK_DEV_ZONED */
 
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 8dc96f4ea920..228522c4338f 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -104,35 +104,6 @@  static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
 	return 0;
 }
 
-/**
- * Allocate a buffer for report zones reply.
- * @sdkp: The target disk
- * @nr_zones: Maximum number of zones to report
- * @buflen: Size of the buffer allocated
- *
- * Try to allocate a reply buffer for the number of requested zones.
- * The size of the buffer allocated may be smaller than requested to
- * satify the device constraint (max_hw_sectors, max_segments, etc).
- *
- * Return the address of the allocated buffer and update @buflen with
- * the size of the allocated buffer.
- */
-static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
-					unsigned int nr_zones, size_t *buflen)
-{
-	struct request_queue *q = sdkp->disk->queue;
-	size_t bufsize;
-	void *buf;
-
-	bufsize = min_t(size_t, roundup(nr_zones * 64, SECTOR_SIZE),
-			q->limits.max_zones_report_size);
-	buf = vzalloc(bufsize);
-	if (buf)
-		*buflen = bufsize;
-
-	return buf;
-}
-
 /**
  * sd_zbc_report_zones - Disk report zones operation.
  * @disk: The target disk
@@ -143,40 +114,40 @@  static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
  * Execute a report zones command on the target disk.
  */
 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
-			struct blk_zone *zones, unsigned int *nr_zones)
+			struct blk_zone *zones, unsigned int *nr_zones,
+			void *buf, size_t buflen)
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	unsigned int i, nrz = *nr_zones;
-	unsigned char *buf;
-	size_t buflen = 0, offset = 0;
-	int ret = 0;
+	unsigned char *rep_buf = buf;
+	size_t offset = 0;
+	int ret;
 
 	if (!sd_is_zoned(sdkp))
 		/* Not a zoned device */
 		return -EOPNOTSUPP;
 
-	buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
+	/*
+	 * The buffer prepared by the block layer may be too large for the
+	 * number of zones requested. Tune it here to avoid requesting too
+	 * many zones than necessary.
+	 */
+	buflen = min_t(size_t, roundup((nrz + 1) * 64, SECTOR_SIZE), buflen);
+	ret = sd_zbc_do_report_zones(sdkp, rep_buf, buflen,
 			sectors_to_logical(sdkp->device, sector), true);
 	if (ret)
-		goto out;
+		return ret;
 
-	nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
+	nrz = min(nrz, get_unaligned_be32(&rep_buf[0]) / 64);
 	for (i = 0; i < nrz; i++) {
 		offset += 64;
-		sd_zbc_parse_report(sdkp, buf + offset, zones);
+		sd_zbc_parse_report(sdkp, rep_buf + offset, zones);
 		zones++;
 	}
 
 	*nr_zones = nrz;
 
-out:
-	kvfree(buf);
-
-	return ret;
+	return 0;
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1c76d71fc232..f04927a7fb40 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -355,11 +355,6 @@  struct queue_limits {
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-/*
- * Maximum number of zones to report with a single report zones command.
- */
-#define BLK_ZONED_REPORT_MAX_ZONES	8192U
-
 extern unsigned int blkdev_nr_zones(struct block_device *bdev);
 extern int blkdev_report_zones(struct block_device *bdev,
 			       sector_t sector, struct blk_zone *zones,
@@ -1713,7 +1708,8 @@  struct block_device_operations {
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
-			    struct blk_zone *zones, unsigned int *nr_zones);
+			    struct blk_zone *zones, unsigned int *nr_zones,
+			    void *buf, size_t buflen);
 	struct module *owner;
 	const struct pr_ops *pr_ops;
 };