diff mbox series

[1/2] block: add max_open_zones to blk-sysfs

Message ID 20200616102546.491961-2-niklas.cassel@wdc.com (mailing list archive)
State Superseded
Headers show
Series Export max open zones and max active zones to sysfs | expand

Commit Message

Niklas Cassel June 16, 2020, 10:25 a.m. UTC
Add a new max_open_zones definition in the sysfs documentation.
This definition will be common for all devices utilizing the zoned block
device support in the kernel.

Export max open zones according to this new definition for NVMe Zoned
Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
the kernel), and ZBC SCSI devices.

Add the new max_open_zones struct member to the request_queue, rather
than as a queue limit, since this property cannot be split across stacking
drivers.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
---
 Documentation/block/queue-sysfs.rst |  7 +++++++
 block/blk-sysfs.c                   | 15 +++++++++++++++
 drivers/nvme/host/zns.c             |  1 +
 drivers/scsi/sd_zbc.c               |  4 ++++
 include/linux/blkdev.h              | 20 ++++++++++++++++++++
 5 files changed, 47 insertions(+)

Comments

Javier González June 29, 2020, 7:41 p.m. UTC | #1
On 16.06.2020 12:25, Niklas Cassel wrote:
>Add a new max_open_zones definition in the sysfs documentation.
>This definition will be common for all devices utilizing the zoned block
>device support in the kernel.
>
>Export max open zones according to this new definition for NVMe Zoned
>Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
>the kernel), and ZBC SCSI devices.
>
>Add the new max_open_zones struct member to the request_queue, rather
>than as a queue limit, since this property cannot be split across stacking
>drivers.
>
>Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
>---
> Documentation/block/queue-sysfs.rst |  7 +++++++
> block/blk-sysfs.c                   | 15 +++++++++++++++
> drivers/nvme/host/zns.c             |  1 +
> drivers/scsi/sd_zbc.c               |  4 ++++
> include/linux/blkdev.h              | 20 ++++++++++++++++++++
> 5 files changed, 47 insertions(+)
>
>diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
>index 6a8513af9201..f01cf8530ae4 100644
>--- a/Documentation/block/queue-sysfs.rst
>+++ b/Documentation/block/queue-sysfs.rst
>@@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
> data that will be submitted by the block layer core to the associated
> block driver.
>
>+max_open_zones (RO)
>+-------------------
>+For zoned block devices (zoned attribute indicating "host-managed" or
>+"host-aware"), the sum of zones belonging to any of the zone states:
>+EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
>+If this value is 0, there is no limit.
>+
> max_sectors_kb (RW)
> -------------------
> This is the maximum number of kilobytes that the block layer will allow
>diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>index 02643e149d5e..fa42961e9678 100644
>--- a/block/blk-sysfs.c
>+++ b/block/blk-sysfs.c
>@@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
> 	return queue_var_show(blk_queue_nr_zones(q), page);
> }
>
>+static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
>+{
>+	return queue_var_show(queue_max_open_zones(q), page);
>+}
>+
> static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
> {
> 	return queue_var_show((blk_queue_nomerges(q) << 1) |
>@@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
> 	.show = queue_nr_zones_show,
> };
>
>+static struct queue_sysfs_entry queue_max_open_zones_entry = {
>+	.attr = {.name = "max_open_zones", .mode = 0444 },
>+	.show = queue_max_open_zones_show,
>+};
>+
> static struct queue_sysfs_entry queue_nomerges_entry = {
> 	.attr = {.name = "nomerges", .mode = 0644 },
> 	.show = queue_nomerges_show,
>@@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
> 	&queue_nonrot_entry.attr,
> 	&queue_zoned_entry.attr,
> 	&queue_nr_zones_entry.attr,
>+	&queue_max_open_zones_entry.attr,
> 	&queue_nomerges_entry.attr,
> 	&queue_rq_affinity_entry.attr,
> 	&queue_iostats_entry.attr,
>@@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
> 		(!q->mq_ops || !q->mq_ops->timeout))
> 			return 0;
>
>+	if (attr == &queue_max_open_zones_entry.attr &&
>+	    !blk_queue_is_zoned(q))
>+		return 0;
>+
> 	return attr->mode;
> }
>
>diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>index c08f6281b614..af156529f3b6 100644
>--- a/drivers/nvme/host/zns.c
>+++ b/drivers/nvme/host/zns.c
>@@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>
> 	q->limits.zoned = BLK_ZONED_HM;
> 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>+	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
> free_data:
> 	kfree(id);
> 	return status;
>diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>index 183a20720da9..aa3564139b40 100644
>--- a/drivers/scsi/sd_zbc.c
>+++ b/drivers/scsi/sd_zbc.c
>@@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
> 	/* The drive satisfies the kernel restrictions: set it up */
> 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> 	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
>+	if (sdkp->zones_max_open == U32_MAX)
>+		blk_queue_max_open_zones(q, 0);
>+	else
>+		blk_queue_max_open_zones(q, sdkp->zones_max_open);
> 	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>
> 	/* READ16/WRITE16 is mandatory for ZBC disks */
>diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>index 8fd900998b4e..2f332f00501d 100644
>--- a/include/linux/blkdev.h
>+++ b/include/linux/blkdev.h
>@@ -520,6 +520,7 @@ struct request_queue {
> 	unsigned int		nr_zones;
> 	unsigned long		*conv_zones_bitmap;
> 	unsigned long		*seq_zones_wlock;
>+	unsigned int		max_open_zones;
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> 	/*
>@@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
> 		return true;
> 	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
> }
>+
>+static inline void blk_queue_max_open_zones(struct request_queue *q,
>+		unsigned int max_open_zones)
>+{
>+	q->max_open_zones = max_open_zones;
>+}
>+
>+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>+{
>+	return q->max_open_zones;
>+}
> #else /* CONFIG_BLK_DEV_ZONED */
> static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
> {
>@@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
> {
> 	return 0;
> }
>+static inline void blk_queue_max_open_zones(struct request_queue *q,
>+		unsigned int max_open_zones)
>+{
>+}
>+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>+{
>+	return 0;
>+}
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> static inline bool rq_is_sync(struct request *rq)
>-- 
>2.26.2
>
>
>_______________________________________________
>linux-nvme mailing list
>linux-nvme@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-nvme

Looks good to me.

Reviewed-by: Javier González <javier@javigon.com>
Damien Le Moal June 30, 2020, 1:49 a.m. UTC | #2
On 2020/06/16 19:28, Niklas Cassel wrote:
> Add a new max_open_zones definition in the sysfs documentation.
> This definition will be common for all devices utilizing the zoned block
> device support in the kernel.
> 
> Export max open zones according to this new definition for NVMe Zoned
> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
> the kernel), and ZBC SCSI devices.
> 
> Add the new max_open_zones struct member to the request_queue, rather

Add the new max_open_zones member to struct request_queue...

> than as a queue limit, since this property cannot be split across stacking
> drivers.

But device-mapper target device have a request queue too and it looks like your
patch is not setting any value, using the default 0 for dm-linear and dm-flakey.
Attaching the new attribute directly to the request queue rather than adding it
as part of the queue limits seems odd. Even if DM case is left unsupported
(using the default 0 = no limit), it may be cleaner to add the field as part of
the limit struct.

Adding the field as a device attribute rather than a queue limit, similarly to
the device maximum queue depth would be another option. In such case, including
the field directly as part of the request queue makes more sense.

> 
> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
> ---
>  Documentation/block/queue-sysfs.rst |  7 +++++++
>  block/blk-sysfs.c                   | 15 +++++++++++++++
>  drivers/nvme/host/zns.c             |  1 +
>  drivers/scsi/sd_zbc.c               |  4 ++++
>  include/linux/blkdev.h              | 20 ++++++++++++++++++++
>  5 files changed, 47 insertions(+)
> 
> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> index 6a8513af9201..f01cf8530ae4 100644
> --- a/Documentation/block/queue-sysfs.rst
> +++ b/Documentation/block/queue-sysfs.rst
> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
>  data that will be submitted by the block layer core to the associated
>  block driver.
>  
> +max_open_zones (RO)
> +-------------------
> +For zoned block devices (zoned attribute indicating "host-managed" or
> +"host-aware"), the sum of zones belonging to any of the zone states:
> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
> +If this value is 0, there is no limit.
> +
>  max_sectors_kb (RW)
>  -------------------
>  This is the maximum number of kilobytes that the block layer will allow
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 02643e149d5e..fa42961e9678 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
>  	return queue_var_show(blk_queue_nr_zones(q), page);
>  }
>  
> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
> +{
> +	return queue_var_show(queue_max_open_zones(q), page);
> +}
> +
>  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
>  {
>  	return queue_var_show((blk_queue_nomerges(q) << 1) |
> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
>  	.show = queue_nr_zones_show,
>  };
>  
> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
> +	.attr = {.name = "max_open_zones", .mode = 0444 },
> +	.show = queue_max_open_zones_show,
> +};
> +
>  static struct queue_sysfs_entry queue_nomerges_entry = {
>  	.attr = {.name = "nomerges", .mode = 0644 },
>  	.show = queue_nomerges_show,
> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
>  	&queue_nonrot_entry.attr,
>  	&queue_zoned_entry.attr,
>  	&queue_nr_zones_entry.attr,
> +	&queue_max_open_zones_entry.attr,
>  	&queue_nomerges_entry.attr,
>  	&queue_rq_affinity_entry.attr,
>  	&queue_iostats_entry.attr,
> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
>  		(!q->mq_ops || !q->mq_ops->timeout))
>  			return 0;
>  
> +	if (attr == &queue_max_open_zones_entry.attr &&
> +	    !blk_queue_is_zoned(q))
> +		return 0;
> +
>  	return attr->mode;
>  }
>  
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index c08f6281b614..af156529f3b6 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>  
>  	q->limits.zoned = BLK_ZONED_HM;
>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>  free_data:
>  	kfree(id);
>  	return status;
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 183a20720da9..aa3564139b40 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>  	/* The drive satisfies the kernel restrictions: set it up */
>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
> +	if (sdkp->zones_max_open == U32_MAX)
> +		blk_queue_max_open_zones(q, 0);
> +	else
> +		blk_queue_max_open_zones(q, sdkp->zones_max_open);

This is correct only for host-managed drives. Host-aware models define the
"OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
number of open sequential write required zones.

Since the standard does not actually explicitly define what the value of the
maximum number of open sequential write required zones should be for a
host-aware drive, I would suggest to always have the max_open_zones value set to
0 for host-aware disks.

>  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>  
>  	/* READ16/WRITE16 is mandatory for ZBC disks */
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8fd900998b4e..2f332f00501d 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -520,6 +520,7 @@ struct request_queue {
>  	unsigned int		nr_zones;
>  	unsigned long		*conv_zones_bitmap;
>  	unsigned long		*seq_zones_wlock;
> +	unsigned int		max_open_zones;
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>  	/*
> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>  		return true;
>  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>  }
> +
> +static inline void blk_queue_max_open_zones(struct request_queue *q,
> +		unsigned int max_open_zones)
> +{
> +	q->max_open_zones = max_open_zones;
> +}
> +
> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> +	return q->max_open_zones;
> +}
>  #else /* CONFIG_BLK_DEV_ZONED */
>  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>  {
> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>  {
>  	return 0;
>  }
> +static inline void blk_queue_max_open_zones(struct request_queue *q,
> +		unsigned int max_open_zones)
> +{
> +}

Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
ever call this function.

> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>  static inline bool rq_is_sync(struct request *rq)
>
Damien Le Moal June 30, 2020, 2:17 a.m. UTC | #3
On 2020/06/30 10:49, Damien Le Moal wrote:
> On 2020/06/16 19:28, Niklas Cassel wrote:
>> Add a new max_open_zones definition in the sysfs documentation.
>> This definition will be common for all devices utilizing the zoned block
>> device support in the kernel.
>>
>> Export max open zones according to this new definition for NVMe Zoned
>> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
>> the kernel), and ZBC SCSI devices.
>>
>> Add the new max_open_zones struct member to the request_queue, rather
> 
> Add the new max_open_zones member to struct request_queue...
> 
>> than as a queue limit, since this property cannot be split across stacking
>> drivers.
> 
> But device-mapper target device have a request queue too and it looks like your
> patch is not setting any value, using the default 0 for dm-linear and dm-flakey.
> Attaching the new attribute directly to the request queue rather than adding it
> as part of the queue limits seems odd. Even if DM case is left unsupported
> (using the default 0 = no limit), it may be cleaner to add the field as part of
> the limit struct.
> 
> Adding the field as a device attribute rather than a queue limit, similarly to
> the device maximum queue depth would be another option. In such case, including
> the field directly as part of the request queue makes more sense.

Thinking more about this one, struct request_queue has nr_zones field, which is
not a queue limit but still exported as a queue attribute. Device mapper
exposing a zoned drive target do set this field manually. So I guess the same
approach is valid for max_open_zones (and max_active_zones). So OK, disregard
this comment.

The other comments I sent below remain though.

> 
>>
>> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
>> ---
>>  Documentation/block/queue-sysfs.rst |  7 +++++++
>>  block/blk-sysfs.c                   | 15 +++++++++++++++
>>  drivers/nvme/host/zns.c             |  1 +
>>  drivers/scsi/sd_zbc.c               |  4 ++++
>>  include/linux/blkdev.h              | 20 ++++++++++++++++++++
>>  5 files changed, 47 insertions(+)
>>
>> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
>> index 6a8513af9201..f01cf8530ae4 100644
>> --- a/Documentation/block/queue-sysfs.rst
>> +++ b/Documentation/block/queue-sysfs.rst
>> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
>>  data that will be submitted by the block layer core to the associated
>>  block driver.
>>  
>> +max_open_zones (RO)
>> +-------------------
>> +For zoned block devices (zoned attribute indicating "host-managed" or
>> +"host-aware"), the sum of zones belonging to any of the zone states:
>> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
>> +If this value is 0, there is no limit.
>> +
>>  max_sectors_kb (RW)
>>  -------------------
>>  This is the maximum number of kilobytes that the block layer will allow
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 02643e149d5e..fa42961e9678 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
>>  	return queue_var_show(blk_queue_nr_zones(q), page);
>>  }
>>  
>> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
>> +{
>> +	return queue_var_show(queue_max_open_zones(q), page);
>> +}
>> +
>>  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
>>  {
>>  	return queue_var_show((blk_queue_nomerges(q) << 1) |
>> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
>>  	.show = queue_nr_zones_show,
>>  };
>>  
>> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
>> +	.attr = {.name = "max_open_zones", .mode = 0444 },
>> +	.show = queue_max_open_zones_show,
>> +};
>> +
>>  static struct queue_sysfs_entry queue_nomerges_entry = {
>>  	.attr = {.name = "nomerges", .mode = 0644 },
>>  	.show = queue_nomerges_show,
>> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
>>  	&queue_nonrot_entry.attr,
>>  	&queue_zoned_entry.attr,
>>  	&queue_nr_zones_entry.attr,
>> +	&queue_max_open_zones_entry.attr,
>>  	&queue_nomerges_entry.attr,
>>  	&queue_rq_affinity_entry.attr,
>>  	&queue_iostats_entry.attr,
>> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
>>  		(!q->mq_ops || !q->mq_ops->timeout))
>>  			return 0;
>>  
>> +	if (attr == &queue_max_open_zones_entry.attr &&
>> +	    !blk_queue_is_zoned(q))
>> +		return 0;
>> +
>>  	return attr->mode;
>>  }
>>  
>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>> index c08f6281b614..af156529f3b6 100644
>> --- a/drivers/nvme/host/zns.c
>> +++ b/drivers/nvme/host/zns.c
>> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  
>>  	q->limits.zoned = BLK_ZONED_HM;
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>> +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>>  free_data:
>>  	kfree(id);
>>  	return status;
>> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>> index 183a20720da9..aa3564139b40 100644
>> --- a/drivers/scsi/sd_zbc.c
>> +++ b/drivers/scsi/sd_zbc.c
>> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>>  	/* The drive satisfies the kernel restrictions: set it up */
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
>> +	if (sdkp->zones_max_open == U32_MAX)
>> +		blk_queue_max_open_zones(q, 0);
>> +	else
>> +		blk_queue_max_open_zones(q, sdkp->zones_max_open);
> 
> This is correct only for host-managed drives. Host-aware models define the
> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
> number of open sequential write required zones.
> 
> Since the standard does not actually explicitly define what the value of the
> maximum number of open sequential write required zones should be for a
> host-aware drive, I would suggest to always have the max_open_zones value set to
> 0 for host-aware disks.
> 
>>  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>>  
>>  	/* READ16/WRITE16 is mandatory for ZBC disks */
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8fd900998b4e..2f332f00501d 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -520,6 +520,7 @@ struct request_queue {
>>  	unsigned int		nr_zones;
>>  	unsigned long		*conv_zones_bitmap;
>>  	unsigned long		*seq_zones_wlock;
>> +	unsigned int		max_open_zones;
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>  
>>  	/*
>> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>>  		return true;
>>  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>>  }
>> +
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> +		unsigned int max_open_zones)
>> +{
>> +	q->max_open_zones = max_open_zones;
>> +}
>> +
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> +	return q->max_open_zones;
>> +}
>>  #else /* CONFIG_BLK_DEV_ZONED */
>>  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>>  {
>> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>>  {
>>  	return 0;
>>  }
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> +		unsigned int max_open_zones)
>> +{
>> +}
> 
> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
> ever call this function.
> 
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> +	return 0;
>> +}
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>  
>>  static inline bool rq_is_sync(struct request *rq)
>>
> 
>
Niklas Cassel July 2, 2020, 12:37 p.m. UTC | #4
On Tue, Jun 30, 2020 at 01:49:41AM +0000, Damien Le Moal wrote:
> On 2020/06/16 19:28, Niklas Cassel wrote:
> > diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> > index c08f6281b614..af156529f3b6 100644
> > --- a/drivers/nvme/host/zns.c
> > +++ b/drivers/nvme/host/zns.c
> > @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
> >  
> >  	q->limits.zoned = BLK_ZONED_HM;
> >  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> > +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
> >  free_data:
> >  	kfree(id);
> >  	return status;
> > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> > index 183a20720da9..aa3564139b40 100644
> > --- a/drivers/scsi/sd_zbc.c
> > +++ b/drivers/scsi/sd_zbc.c
> > @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
> >  	/* The drive satisfies the kernel restrictions: set it up */
> >  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> >  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
> > +	if (sdkp->zones_max_open == U32_MAX)
> > +		blk_queue_max_open_zones(q, 0);
> > +	else
> > +		blk_queue_max_open_zones(q, sdkp->zones_max_open);
> 
> This is correct only for host-managed drives. Host-aware models define the
> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
> number of open sequential write required zones.
> 
> Since the standard does not actually explicitly define what the value of the
> maximum number of open sequential write required zones should be for a
> host-aware drive, I would suggest to always have the max_open_zones value set to
> 0 for host-aware disks.

Isn't this already the case?

At least according to the comments:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n555

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n561

We seem to set

sdkp->zones_max_open = 0;

for host-aware, and

sdkp->zones_max_open = get_unaligned_be32(&buf[16]);

for host-managed.

So the blk_queue_max_open_zones(q, sdkp->zones_max_open) call in
sd_zbc_read_zones() should already export this new sysfs property
as 0 for host-aware disks.


Kind regards,
Niklas

> 
> >  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
> >  
> >  	/* READ16/WRITE16 is mandatory for ZBC disks */
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index 8fd900998b4e..2f332f00501d 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -520,6 +520,7 @@ struct request_queue {
> >  	unsigned int		nr_zones;
> >  	unsigned long		*conv_zones_bitmap;
> >  	unsigned long		*seq_zones_wlock;
> > +	unsigned int		max_open_zones;
> >  #endif /* CONFIG_BLK_DEV_ZONED */
> >  
> >  	/*
> > @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
> >  		return true;
> >  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
> >  }
> > +
> > +static inline void blk_queue_max_open_zones(struct request_queue *q,
> > +		unsigned int max_open_zones)
> > +{
> > +	q->max_open_zones = max_open_zones;
> > +}
> > +
> > +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> > +{
> > +	return q->max_open_zones;
> > +}
> >  #else /* CONFIG_BLK_DEV_ZONED */
> >  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
> >  {
> > @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
> >  {
> >  	return 0;
> >  }
> > +static inline void blk_queue_max_open_zones(struct request_queue *q,
> > +		unsigned int max_open_zones)
> > +{
> > +}
> 
> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
> ever call this function.

Will remove in v2.
Damien Le Moal July 3, 2020, 4:56 a.m. UTC | #5
On 2020/07/02 21:37, Niklas Cassel wrote:
> On Tue, Jun 30, 2020 at 01:49:41AM +0000, Damien Le Moal wrote:
>> On 2020/06/16 19:28, Niklas Cassel wrote:
>>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>> index c08f6281b614..af156529f3b6 100644
>>> --- a/drivers/nvme/host/zns.c
>>> +++ b/drivers/nvme/host/zns.c
>>> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>  
>>>  	q->limits.zoned = BLK_ZONED_HM;
>>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>> +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>>>  free_data:
>>>  	kfree(id);
>>>  	return status;
>>> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>>> index 183a20720da9..aa3564139b40 100644
>>> --- a/drivers/scsi/sd_zbc.c
>>> +++ b/drivers/scsi/sd_zbc.c
>>> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>>>  	/* The drive satisfies the kernel restrictions: set it up */
>>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>>  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
>>> +	if (sdkp->zones_max_open == U32_MAX)
>>> +		blk_queue_max_open_zones(q, 0);
>>> +	else
>>> +		blk_queue_max_open_zones(q, sdkp->zones_max_open);
>>
>> This is correct only for host-managed drives. Host-aware models define the
>> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
>> number of open sequential write required zones.
>>
>> Since the standard does not actually explicitly define what the value of the
>> maximum number of open sequential write required zones should be for a
>> host-aware drive, I would suggest to always have the max_open_zones value set to
>> 0 for host-aware disks.
> 
> Isn't this already the case?
> 
> At least according to the comments:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n555
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n561
> 
> We seem to set
> 
> sdkp->zones_max_open = 0;
> 
> for host-aware, and
> 
> sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
> 
> for host-managed.
> 
> So the blk_queue_max_open_zones(q, sdkp->zones_max_open) call in
> sd_zbc_read_zones() should already export this new sysfs property
> as 0 for host-aware disks.

Oh, yes ! You are absolutely right. Forgot about that code :)
Please disregard this comment.

> 
> 
> Kind regards,
> Niklas
> 
>>
>>>  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>>>  
>>>  	/* READ16/WRITE16 is mandatory for ZBC disks */
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8fd900998b4e..2f332f00501d 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -520,6 +520,7 @@ struct request_queue {
>>>  	unsigned int		nr_zones;
>>>  	unsigned long		*conv_zones_bitmap;
>>>  	unsigned long		*seq_zones_wlock;
>>> +	unsigned int		max_open_zones;
>>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>>  
>>>  	/*
>>> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>>>  		return true;
>>>  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>>>  }
>>> +
>>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>>> +		unsigned int max_open_zones)
>>> +{
>>> +	q->max_open_zones = max_open_zones;
>>> +}
>>> +
>>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>>> +{
>>> +	return q->max_open_zones;
>>> +}
>>>  #else /* CONFIG_BLK_DEV_ZONED */
>>>  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>>>  {
>>> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>>>  {
>>>  	return 0;
>>>  }
>>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>>> +		unsigned int max_open_zones)
>>> +{
>>> +}
>>
>> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
>> ever call this function.
> 
> Will remove in v2.
>
diff mbox series

Patch

diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
index 6a8513af9201..f01cf8530ae4 100644
--- a/Documentation/block/queue-sysfs.rst
+++ b/Documentation/block/queue-sysfs.rst
@@ -117,6 +117,13 @@  Maximum number of elements in a DMA scatter/gather list with integrity
 data that will be submitted by the block layer core to the associated
 block driver.
 
+max_open_zones (RO)
+-------------------
+For zoned block devices (zoned attribute indicating "host-managed" or
+"host-aware"), the sum of zones belonging to any of the zone states:
+EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
+If this value is 0, there is no limit.
+
 max_sectors_kb (RW)
 -------------------
 This is the maximum number of kilobytes that the block layer will allow
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 02643e149d5e..fa42961e9678 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -305,6 +305,11 @@  static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
 	return queue_var_show(blk_queue_nr_zones(q), page);
 }
 
+static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_max_open_zones(q), page);
+}
+
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -667,6 +672,11 @@  static struct queue_sysfs_entry queue_nr_zones_entry = {
 	.show = queue_nr_zones_show,
 };
 
+static struct queue_sysfs_entry queue_max_open_zones_entry = {
+	.attr = {.name = "max_open_zones", .mode = 0444 },
+	.show = queue_max_open_zones_show,
+};
+
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = 0644 },
 	.show = queue_nomerges_show,
@@ -765,6 +775,7 @@  static struct attribute *queue_attrs[] = {
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
 	&queue_nr_zones_entry.attr,
+	&queue_max_open_zones_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
@@ -792,6 +803,10 @@  static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
 		(!q->mq_ops || !q->mq_ops->timeout))
 			return 0;
 
+	if (attr == &queue_max_open_zones_entry.attr &&
+	    !blk_queue_is_zoned(q))
+		return 0;
+
 	return attr->mode;
 }
 
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index c08f6281b614..af156529f3b6 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -82,6 +82,7 @@  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
 
 	q->limits.zoned = BLK_ZONED_HM;
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
 free_data:
 	kfree(id);
 	return status;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 183a20720da9..aa3564139b40 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -717,6 +717,10 @@  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 	/* The drive satisfies the kernel restrictions: set it up */
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
 	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
+	if (sdkp->zones_max_open == U32_MAX)
+		blk_queue_max_open_zones(q, 0);
+	else
+		blk_queue_max_open_zones(q, sdkp->zones_max_open);
 	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
 
 	/* READ16/WRITE16 is mandatory for ZBC disks */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fd900998b4e..2f332f00501d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -520,6 +520,7 @@  struct request_queue {
 	unsigned int		nr_zones;
 	unsigned long		*conv_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+	unsigned int		max_open_zones;
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 	/*
@@ -729,6 +730,17 @@  static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return true;
 	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
 }
+
+static inline void blk_queue_max_open_zones(struct request_queue *q,
+		unsigned int max_open_zones)
+{
+	q->max_open_zones = max_open_zones;
+}
+
+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
+{
+	return q->max_open_zones;
+}
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 {
@@ -744,6 +756,14 @@  static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 {
 	return 0;
 }
+static inline void blk_queue_max_open_zones(struct request_queue *q,
+		unsigned int max_open_zones)
+{
+}
+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
+{
+	return 0;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)