Message ID | 20200616102546.491961-2-niklas.cassel@wdc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Export max open zones and max active zones to sysfs | expand |
On 16.06.2020 12:25, Niklas Cassel wrote: >Add a new max_open_zones definition in the sysfs documentation. >This definition will be common for all devices utilizing the zoned block >device support in the kernel. > >Export max open zones according to this new definition for NVMe Zoned >Namespace devices, ZAC ATA devices (which are treated as SCSI devices by >the kernel), and ZBC SCSI devices. > >Add the new max_open_zones struct member to the request_queue, rather >than as a queue limit, since this property cannot be split across stacking >drivers. > >Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> >--- > Documentation/block/queue-sysfs.rst | 7 +++++++ > block/blk-sysfs.c | 15 +++++++++++++++ > drivers/nvme/host/zns.c | 1 + > drivers/scsi/sd_zbc.c | 4 ++++ > include/linux/blkdev.h | 20 ++++++++++++++++++++ > 5 files changed, 47 insertions(+) > >diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst >index 6a8513af9201..f01cf8530ae4 100644 >--- a/Documentation/block/queue-sysfs.rst >+++ b/Documentation/block/queue-sysfs.rst >@@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity > data that will be submitted by the block layer core to the associated > block driver. > >+max_open_zones (RO) >+------------------- >+For zoned block devices (zoned attribute indicating "host-managed" or >+"host-aware"), the sum of zones belonging to any of the zone states: >+EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. >+If this value is 0, there is no limit. >+ > max_sectors_kb (RW) > ------------------- > This is the maximum number of kilobytes that the block layer will allow >diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c >index 02643e149d5e..fa42961e9678 100644 >--- a/block/blk-sysfs.c >+++ b/block/blk-sysfs.c >@@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) > return queue_var_show(blk_queue_nr_zones(q), page); > } > >+static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) >+{ >+ return queue_var_show(queue_max_open_zones(q), page); >+} >+ > static ssize_t queue_nomerges_show(struct request_queue *q, char *page) > { > return queue_var_show((blk_queue_nomerges(q) << 1) | >@@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { > .show = queue_nr_zones_show, > }; > >+static struct queue_sysfs_entry queue_max_open_zones_entry = { >+ .attr = {.name = "max_open_zones", .mode = 0444 }, >+ .show = queue_max_open_zones_show, >+}; >+ > static struct queue_sysfs_entry queue_nomerges_entry = { > .attr = {.name = "nomerges", .mode = 0644 }, > .show = queue_nomerges_show, >@@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { > &queue_nonrot_entry.attr, > &queue_zoned_entry.attr, > &queue_nr_zones_entry.attr, >+ &queue_max_open_zones_entry.attr, > &queue_nomerges_entry.attr, > &queue_rq_affinity_entry.attr, > &queue_iostats_entry.attr, >@@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, > (!q->mq_ops || !q->mq_ops->timeout)) > return 0; > >+ if (attr == &queue_max_open_zones_entry.attr && >+ !blk_queue_is_zoned(q)) >+ return 0; >+ > return attr->mode; > } > >diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c >index c08f6281b614..af156529f3b6 100644 >--- a/drivers/nvme/host/zns.c >+++ b/drivers/nvme/host/zns.c >@@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, > > q->limits.zoned = BLK_ZONED_HM; > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); >+ blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); > free_data: > kfree(id); > return status; >diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c >index 183a20720da9..aa3564139b40 100644 >--- a/drivers/scsi/sd_zbc.c >+++ b/drivers/scsi/sd_zbc.c >@@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) > /* The drive satisfies the kernel restrictions: set it up */ > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); >+ if (sdkp->zones_max_open == U32_MAX) >+ blk_queue_max_open_zones(q, 0); >+ else >+ blk_queue_max_open_zones(q, sdkp->zones_max_open); > nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); > > /* READ16/WRITE16 is mandatory for ZBC disks */ >diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >index 8fd900998b4e..2f332f00501d 100644 >--- a/include/linux/blkdev.h >+++ b/include/linux/blkdev.h >@@ -520,6 +520,7 @@ struct request_queue { > unsigned int nr_zones; > unsigned long *conv_zones_bitmap; > unsigned long *seq_zones_wlock; >+ unsigned int max_open_zones; > #endif /* CONFIG_BLK_DEV_ZONED */ > > /* >@@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, > return true; > return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); > } >+ >+static inline void blk_queue_max_open_zones(struct request_queue *q, >+ unsigned int max_open_zones) >+{ >+ q->max_open_zones = max_open_zones; >+} >+ >+static inline unsigned int queue_max_open_zones(const struct request_queue *q) >+{ >+ return q->max_open_zones; >+} > #else /* CONFIG_BLK_DEV_ZONED */ > static inline unsigned int blk_queue_nr_zones(struct request_queue *q) > { >@@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, > { > return 0; > } >+static inline void blk_queue_max_open_zones(struct request_queue *q, >+ unsigned int max_open_zones) >+{ >+} >+static inline unsigned int queue_max_open_zones(const struct request_queue *q) >+{ >+ return 0; >+} > #endif /* CONFIG_BLK_DEV_ZONED */ > > static inline bool rq_is_sync(struct request *rq) >-- >2.26.2 > > >_______________________________________________ >linux-nvme mailing list >linux-nvme@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/linux-nvme Looks good to me. Reviewed-by: Javier González <javier@javigon.com>
On 2020/06/16 19:28, Niklas Cassel wrote: > Add a new max_open_zones definition in the sysfs documentation. > This definition will be common for all devices utilizing the zoned block > device support in the kernel. > > Export max open zones according to this new definition for NVMe Zoned > Namespace devices, ZAC ATA devices (which are treated as SCSI devices by > the kernel), and ZBC SCSI devices. > > Add the new max_open_zones struct member to the request_queue, rather Add the new max_open_zones member to struct request_queue... > than as a queue limit, since this property cannot be split across stacking > drivers. But device-mapper target device have a request queue too and it looks like your patch is not setting any value, using the default 0 for dm-linear and dm-flakey. Attaching the new attribute directly to the request queue rather than adding it as part of the queue limits seems odd. Even if DM case is left unsupported (using the default 0 = no limit), it may be cleaner to add the field as part of the limit struct. Adding the field as a device attribute rather than a queue limit, similarly to the device maximum queue depth would be another option. In such case, including the field directly as part of the request queue makes more sense. > > Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> > --- > Documentation/block/queue-sysfs.rst | 7 +++++++ > block/blk-sysfs.c | 15 +++++++++++++++ > drivers/nvme/host/zns.c | 1 + > drivers/scsi/sd_zbc.c | 4 ++++ > include/linux/blkdev.h | 20 ++++++++++++++++++++ > 5 files changed, 47 insertions(+) > > diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst > index 6a8513af9201..f01cf8530ae4 100644 > --- a/Documentation/block/queue-sysfs.rst > +++ b/Documentation/block/queue-sysfs.rst > @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity > data that will be submitted by the block layer core to the associated > block driver. > > +max_open_zones (RO) > +------------------- > +For zoned block devices (zoned attribute indicating "host-managed" or > +"host-aware"), the sum of zones belonging to any of the zone states: > +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. > +If this value is 0, there is no limit. > + > max_sectors_kb (RW) > ------------------- > This is the maximum number of kilobytes that the block layer will allow > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index 02643e149d5e..fa42961e9678 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) > return queue_var_show(blk_queue_nr_zones(q), page); > } > > +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) > +{ > + return queue_var_show(queue_max_open_zones(q), page); > +} > + > static ssize_t queue_nomerges_show(struct request_queue *q, char *page) > { > return queue_var_show((blk_queue_nomerges(q) << 1) | > @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { > .show = queue_nr_zones_show, > }; > > +static struct queue_sysfs_entry queue_max_open_zones_entry = { > + .attr = {.name = "max_open_zones", .mode = 0444 }, > + .show = queue_max_open_zones_show, > +}; > + > static struct queue_sysfs_entry queue_nomerges_entry = { > .attr = {.name = "nomerges", .mode = 0644 }, > .show = queue_nomerges_show, > @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { > &queue_nonrot_entry.attr, > &queue_zoned_entry.attr, > &queue_nr_zones_entry.attr, > + &queue_max_open_zones_entry.attr, > &queue_nomerges_entry.attr, > &queue_rq_affinity_entry.attr, > &queue_iostats_entry.attr, > @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, > (!q->mq_ops || !q->mq_ops->timeout)) > return 0; > > + if (attr == &queue_max_open_zones_entry.attr && > + !blk_queue_is_zoned(q)) > + return 0; > + > return attr->mode; > } > > diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c > index c08f6281b614..af156529f3b6 100644 > --- a/drivers/nvme/host/zns.c > +++ b/drivers/nvme/host/zns.c > @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, > > q->limits.zoned = BLK_ZONED_HM; > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); > free_data: > kfree(id); > return status; > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > index 183a20720da9..aa3564139b40 100644 > --- a/drivers/scsi/sd_zbc.c > +++ b/drivers/scsi/sd_zbc.c > @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) > /* The drive satisfies the kernel restrictions: set it up */ > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); > + if (sdkp->zones_max_open == U32_MAX) > + blk_queue_max_open_zones(q, 0); > + else > + blk_queue_max_open_zones(q, sdkp->zones_max_open); This is correct only for host-managed drives. Host-aware models define the "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum number of open sequential write required zones. Since the standard does not actually explicitly define what the value of the maximum number of open sequential write required zones should be for a host-aware drive, I would suggest to always have the max_open_zones value set to 0 for host-aware disks. > nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); > > /* READ16/WRITE16 is mandatory for ZBC disks */ > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 8fd900998b4e..2f332f00501d 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -520,6 +520,7 @@ struct request_queue { > unsigned int nr_zones; > unsigned long *conv_zones_bitmap; > unsigned long *seq_zones_wlock; > + unsigned int max_open_zones; > #endif /* CONFIG_BLK_DEV_ZONED */ > > /* > @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, > return true; > return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); > } > + > +static inline void blk_queue_max_open_zones(struct request_queue *q, > + unsigned int max_open_zones) > +{ > + q->max_open_zones = max_open_zones; > +} > + > +static inline unsigned int queue_max_open_zones(const struct request_queue *q) > +{ > + return q->max_open_zones; > +} > #else /* CONFIG_BLK_DEV_ZONED */ > static inline unsigned int blk_queue_nr_zones(struct request_queue *q) > { > @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, > { > return 0; > } > +static inline void blk_queue_max_open_zones(struct request_queue *q, > + unsigned int max_open_zones) > +{ > +} Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should ever call this function. > +static inline unsigned int queue_max_open_zones(const struct request_queue *q) > +{ > + return 0; > +} > #endif /* CONFIG_BLK_DEV_ZONED */ > > static inline bool rq_is_sync(struct request *rq) >
On 2020/06/30 10:49, Damien Le Moal wrote: > On 2020/06/16 19:28, Niklas Cassel wrote: >> Add a new max_open_zones definition in the sysfs documentation. >> This definition will be common for all devices utilizing the zoned block >> device support in the kernel. >> >> Export max open zones according to this new definition for NVMe Zoned >> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by >> the kernel), and ZBC SCSI devices. >> >> Add the new max_open_zones struct member to the request_queue, rather > > Add the new max_open_zones member to struct request_queue... > >> than as a queue limit, since this property cannot be split across stacking >> drivers. > > But device-mapper target device have a request queue too and it looks like your > patch is not setting any value, using the default 0 for dm-linear and dm-flakey. > Attaching the new attribute directly to the request queue rather than adding it > as part of the queue limits seems odd. Even if DM case is left unsupported > (using the default 0 = no limit), it may be cleaner to add the field as part of > the limit struct. > > Adding the field as a device attribute rather than a queue limit, similarly to > the device maximum queue depth would be another option. In such case, including > the field directly as part of the request queue makes more sense. Thinking more about this one, struct request_queue has nr_zones field, which is not a queue limit but still exported as a queue attribute. Device mapper exposing a zoned drive target do set this field manually. So I guess the same approach is valid for max_open_zones (and max_active_zones). So OK, disregard this comment. The other comments I sent below remain though. > >> >> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> >> --- >> Documentation/block/queue-sysfs.rst | 7 +++++++ >> block/blk-sysfs.c | 15 +++++++++++++++ >> drivers/nvme/host/zns.c | 1 + >> drivers/scsi/sd_zbc.c | 4 ++++ >> include/linux/blkdev.h | 20 ++++++++++++++++++++ >> 5 files changed, 47 insertions(+) >> >> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst >> index 6a8513af9201..f01cf8530ae4 100644 >> --- a/Documentation/block/queue-sysfs.rst >> +++ b/Documentation/block/queue-sysfs.rst >> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity >> data that will be submitted by the block layer core to the associated >> block driver. >> >> +max_open_zones (RO) >> +------------------- >> +For zoned block devices (zoned attribute indicating "host-managed" or >> +"host-aware"), the sum of zones belonging to any of the zone states: >> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. >> +If this value is 0, there is no limit. >> + >> max_sectors_kb (RW) >> ------------------- >> This is the maximum number of kilobytes that the block layer will allow >> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c >> index 02643e149d5e..fa42961e9678 100644 >> --- a/block/blk-sysfs.c >> +++ b/block/blk-sysfs.c >> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) >> return queue_var_show(blk_queue_nr_zones(q), page); >> } >> >> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) >> +{ >> + return queue_var_show(queue_max_open_zones(q), page); >> +} >> + >> static ssize_t queue_nomerges_show(struct request_queue *q, char *page) >> { >> return queue_var_show((blk_queue_nomerges(q) << 1) | >> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { >> .show = queue_nr_zones_show, >> }; >> >> +static struct queue_sysfs_entry queue_max_open_zones_entry = { >> + .attr = {.name = "max_open_zones", .mode = 0444 }, >> + .show = queue_max_open_zones_show, >> +}; >> + >> static struct queue_sysfs_entry queue_nomerges_entry = { >> .attr = {.name = "nomerges", .mode = 0644 }, >> .show = queue_nomerges_show, >> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { >> &queue_nonrot_entry.attr, >> &queue_zoned_entry.attr, >> &queue_nr_zones_entry.attr, >> + &queue_max_open_zones_entry.attr, >> &queue_nomerges_entry.attr, >> &queue_rq_affinity_entry.attr, >> &queue_iostats_entry.attr, >> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, >> (!q->mq_ops || !q->mq_ops->timeout)) >> return 0; >> >> + if (attr == &queue_max_open_zones_entry.attr && >> + !blk_queue_is_zoned(q)) >> + return 0; >> + >> return attr->mode; >> } >> >> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c >> index c08f6281b614..af156529f3b6 100644 >> --- a/drivers/nvme/host/zns.c >> +++ b/drivers/nvme/host/zns.c >> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, >> >> q->limits.zoned = BLK_ZONED_HM; >> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); >> + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); >> free_data: >> kfree(id); >> return status; >> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c >> index 183a20720da9..aa3564139b40 100644 >> --- a/drivers/scsi/sd_zbc.c >> +++ b/drivers/scsi/sd_zbc.c >> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) >> /* The drive satisfies the kernel restrictions: set it up */ >> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); >> blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); >> + if (sdkp->zones_max_open == U32_MAX) >> + blk_queue_max_open_zones(q, 0); >> + else >> + blk_queue_max_open_zones(q, sdkp->zones_max_open); > > This is correct only for host-managed drives. Host-aware models define the > "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum > number of open sequential write required zones. > > Since the standard does not actually explicitly define what the value of the > maximum number of open sequential write required zones should be for a > host-aware drive, I would suggest to always have the max_open_zones value set to > 0 for host-aware disks. > >> nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); >> >> /* READ16/WRITE16 is mandatory for ZBC disks */ >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index 8fd900998b4e..2f332f00501d 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -520,6 +520,7 @@ struct request_queue { >> unsigned int nr_zones; >> unsigned long *conv_zones_bitmap; >> unsigned long *seq_zones_wlock; >> + unsigned int max_open_zones; >> #endif /* CONFIG_BLK_DEV_ZONED */ >> >> /* >> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, >> return true; >> return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); >> } >> + >> +static inline void blk_queue_max_open_zones(struct request_queue *q, >> + unsigned int max_open_zones) >> +{ >> + q->max_open_zones = max_open_zones; >> +} >> + >> +static inline unsigned int queue_max_open_zones(const struct request_queue *q) >> +{ >> + return q->max_open_zones; >> +} >> #else /* CONFIG_BLK_DEV_ZONED */ >> static inline unsigned int blk_queue_nr_zones(struct request_queue *q) >> { >> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, >> { >> return 0; >> } >> +static inline void blk_queue_max_open_zones(struct request_queue *q, >> + unsigned int max_open_zones) >> +{ >> +} > > Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should > ever call this function. > >> +static inline unsigned int queue_max_open_zones(const struct request_queue *q) >> +{ >> + return 0; >> +} >> #endif /* CONFIG_BLK_DEV_ZONED */ >> >> static inline bool rq_is_sync(struct request *rq) >> > >
On Tue, Jun 30, 2020 at 01:49:41AM +0000, Damien Le Moal wrote: > On 2020/06/16 19:28, Niklas Cassel wrote: > > diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c > > index c08f6281b614..af156529f3b6 100644 > > --- a/drivers/nvme/host/zns.c > > +++ b/drivers/nvme/host/zns.c > > @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, > > > > q->limits.zoned = BLK_ZONED_HM; > > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > > + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); > > free_data: > > kfree(id); > > return status; > > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > > index 183a20720da9..aa3564139b40 100644 > > --- a/drivers/scsi/sd_zbc.c > > +++ b/drivers/scsi/sd_zbc.c > > @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) > > /* The drive satisfies the kernel restrictions: set it up */ > > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > > blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); > > + if (sdkp->zones_max_open == U32_MAX) > > + blk_queue_max_open_zones(q, 0); > > + else > > + blk_queue_max_open_zones(q, sdkp->zones_max_open); > > This is correct only for host-managed drives. Host-aware models define the > "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum > number of open sequential write required zones. > > Since the standard does not actually explicitly define what the value of the > maximum number of open sequential write required zones should be for a > host-aware drive, I would suggest to always have the max_open_zones value set to > 0 for host-aware disks. Isn't this already the case? At least according to the comments: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n555 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n561 We seem to set sdkp->zones_max_open = 0; for host-aware, and sdkp->zones_max_open = get_unaligned_be32(&buf[16]); for host-managed. So the blk_queue_max_open_zones(q, sdkp->zones_max_open) call in sd_zbc_read_zones() should already export this new sysfs property as 0 for host-aware disks. Kind regards, Niklas > > > nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); > > > > /* READ16/WRITE16 is mandatory for ZBC disks */ > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > > index 8fd900998b4e..2f332f00501d 100644 > > --- a/include/linux/blkdev.h > > +++ b/include/linux/blkdev.h > > @@ -520,6 +520,7 @@ struct request_queue { > > unsigned int nr_zones; > > unsigned long *conv_zones_bitmap; > > unsigned long *seq_zones_wlock; > > + unsigned int max_open_zones; > > #endif /* CONFIG_BLK_DEV_ZONED */ > > > > /* > > @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, > > return true; > > return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); > > } > > + > > +static inline void blk_queue_max_open_zones(struct request_queue *q, > > + unsigned int max_open_zones) > > +{ > > + q->max_open_zones = max_open_zones; > > +} > > + > > +static inline unsigned int queue_max_open_zones(const struct request_queue *q) > > +{ > > + return q->max_open_zones; > > +} > > #else /* CONFIG_BLK_DEV_ZONED */ > > static inline unsigned int blk_queue_nr_zones(struct request_queue *q) > > { > > @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, > > { > > return 0; > > } > > +static inline void blk_queue_max_open_zones(struct request_queue *q, > > + unsigned int max_open_zones) > > +{ > > +} > > Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should > ever call this function. Will remove in v2.
On 2020/07/02 21:37, Niklas Cassel wrote: > On Tue, Jun 30, 2020 at 01:49:41AM +0000, Damien Le Moal wrote: >> On 2020/06/16 19:28, Niklas Cassel wrote: >>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c >>> index c08f6281b614..af156529f3b6 100644 >>> --- a/drivers/nvme/host/zns.c >>> +++ b/drivers/nvme/host/zns.c >>> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, >>> >>> q->limits.zoned = BLK_ZONED_HM; >>> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); >>> + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); >>> free_data: >>> kfree(id); >>> return status; >>> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c >>> index 183a20720da9..aa3564139b40 100644 >>> --- a/drivers/scsi/sd_zbc.c >>> +++ b/drivers/scsi/sd_zbc.c >>> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) >>> /* The drive satisfies the kernel restrictions: set it up */ >>> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); >>> blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); >>> + if (sdkp->zones_max_open == U32_MAX) >>> + blk_queue_max_open_zones(q, 0); >>> + else >>> + blk_queue_max_open_zones(q, sdkp->zones_max_open); >> >> This is correct only for host-managed drives. Host-aware models define the >> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum >> number of open sequential write required zones. >> >> Since the standard does not actually explicitly define what the value of the >> maximum number of open sequential write required zones should be for a >> host-aware drive, I would suggest to always have the max_open_zones value set to >> 0 for host-aware disks. > > Isn't this already the case? > > At least according to the comments: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n555 > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd_zbc.c?h=v5.8-rc3#n561 > > We seem to set > > sdkp->zones_max_open = 0; > > for host-aware, and > > sdkp->zones_max_open = get_unaligned_be32(&buf[16]); > > for host-managed. > > So the blk_queue_max_open_zones(q, sdkp->zones_max_open) call in > sd_zbc_read_zones() should already export this new sysfs property > as 0 for host-aware disks. Oh, yes ! You are absolutely right. Forgot about that code :) Please disregard this comment. > > > Kind regards, > Niklas > >> >>> nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); >>> >>> /* READ16/WRITE16 is mandatory for ZBC disks */ >>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >>> index 8fd900998b4e..2f332f00501d 100644 >>> --- a/include/linux/blkdev.h >>> +++ b/include/linux/blkdev.h >>> @@ -520,6 +520,7 @@ struct request_queue { >>> unsigned int nr_zones; >>> unsigned long *conv_zones_bitmap; >>> unsigned long *seq_zones_wlock; >>> + unsigned int max_open_zones; >>> #endif /* CONFIG_BLK_DEV_ZONED */ >>> >>> /* >>> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, >>> return true; >>> return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); >>> } >>> + >>> +static inline void blk_queue_max_open_zones(struct request_queue *q, >>> + unsigned int max_open_zones) >>> +{ >>> + q->max_open_zones = max_open_zones; >>> +} >>> + >>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q) >>> +{ >>> + return q->max_open_zones; >>> +} >>> #else /* CONFIG_BLK_DEV_ZONED */ >>> static inline unsigned int blk_queue_nr_zones(struct request_queue *q) >>> { >>> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, >>> { >>> return 0; >>> } >>> +static inline void blk_queue_max_open_zones(struct request_queue *q, >>> + unsigned int max_open_zones) >>> +{ >>> +} >> >> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should >> ever call this function. > > Will remove in v2. >
diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst index 6a8513af9201..f01cf8530ae4 100644 --- a/Documentation/block/queue-sysfs.rst +++ b/Documentation/block/queue-sysfs.rst @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity data that will be submitted by the block layer core to the associated block driver. +max_open_zones (RO) +------------------- +For zoned block devices (zoned attribute indicating "host-managed" or +"host-aware"), the sum of zones belonging to any of the zone states: +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. +If this value is 0, there is no limit. + max_sectors_kb (RW) ------------------- This is the maximum number of kilobytes that the block layer will allow diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 02643e149d5e..fa42961e9678 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) return queue_var_show(blk_queue_nr_zones(q), page); } +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_max_open_zones(q), page); +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show((blk_queue_nomerges(q) << 1) | @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { .show = queue_nr_zones_show, }; +static struct queue_sysfs_entry queue_max_open_zones_entry = { + .attr = {.name = "max_open_zones", .mode = 0444 }, + .show = queue_max_open_zones_show, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = 0644 }, .show = queue_nomerges_show, @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { &queue_nonrot_entry.attr, &queue_zoned_entry.attr, &queue_nr_zones_entry.attr, + &queue_max_open_zones_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, (!q->mq_ops || !q->mq_ops->timeout)) return 0; + if (attr == &queue_max_open_zones_entry.attr && + !blk_queue_is_zoned(q)) + return 0; + return attr->mode; } diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index c08f6281b614..af156529f3b6 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, q->limits.zoned = BLK_ZONED_HM; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); free_data: kfree(id); return status; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 183a20720da9..aa3564139b40 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) /* The drive satisfies the kernel restrictions: set it up */ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); + if (sdkp->zones_max_open == U32_MAX) + blk_queue_max_open_zones(q, 0); + else + blk_queue_max_open_zones(q, sdkp->zones_max_open); nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); /* READ16/WRITE16 is mandatory for ZBC disks */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..2f332f00501d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -520,6 +520,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; + unsigned int max_open_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return true; return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } + +static inline void blk_queue_max_open_zones(struct request_queue *q, + unsigned int max_open_zones) +{ + q->max_open_zones = max_open_zones; +} + +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return q->max_open_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } +static inline void blk_queue_max_open_zones(struct request_queue *q, + unsigned int max_open_zones) +{ +} +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq)
Add a new max_open_zones definition in the sysfs documentation. This definition will be common for all devices utilizing the zoned block device support in the kernel. Export max open zones according to this new definition for NVMe Zoned Namespace devices, ZAC ATA devices (which are treated as SCSI devices by the kernel), and ZBC SCSI devices. Add the new max_open_zones struct member to the request_queue, rather than as a queue limit, since this property cannot be split across stacking drivers. Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> --- Documentation/block/queue-sysfs.rst | 7 +++++++ block/blk-sysfs.c | 15 +++++++++++++++ drivers/nvme/host/zns.c | 1 + drivers/scsi/sd_zbc.c | 4 ++++ include/linux/blkdev.h | 20 ++++++++++++++++++++ 5 files changed, 47 insertions(+)