Message ID | 20230627183629.26571-2-nj.shetty@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v13,1/9] block: Introduce queue limits for copy-offload support | expand |
On 23/06/28 03:40PM, Damien Le Moal wrote: >On 6/28/23 03:36, Nitesh Shetty wrote: >> Add device limits as sysfs entries, >> - copy_offload (RW) >> - copy_max_bytes (RW) >> - copy_max_bytes_hw (RO) >> >> Above limits help to split the copy payload in block layer. >> copy_offload: used for setting copy offload(1) or emulation(0). >> copy_max_bytes: maximum total length of copy in single payload. >> copy_max_bytes_hw: Reflects the device supported maximum limit. >> >> Reviewed-by: Hannes Reinecke <hare@suse.de> >> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> >> --- >> Documentation/ABI/stable/sysfs-block | 33 +++++++++++++++ >> block/blk-settings.c | 24 +++++++++++ >> block/blk-sysfs.c | 63 ++++++++++++++++++++++++++++ >> include/linux/blkdev.h | 12 ++++++ >> include/uapi/linux/fs.h | 3 ++ >> 5 files changed, 135 insertions(+) >> >> diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block >> index c57e5b7cb532..3c97303f658b 100644 >> --- a/Documentation/ABI/stable/sysfs-block >> +++ b/Documentation/ABI/stable/sysfs-block >> @@ -155,6 +155,39 @@ Description: >> last zone of the device which may be smaller. >> >> >> +What: /sys/block/<disk>/queue/copy_offload >> +Date: June 2023 >> +Contact: linux-block@vger.kernel.org >> +Description: >> + [RW] When read, this file shows whether offloading copy to a >> + device is enabled (1) or disabled (0). Writing '0' to this >> + file will disable offloading copies for this device. >> + Writing any '1' value will enable this feature. If the device >> + does not support offloading, then writing 1, will result in an >> + error. > >I am still not convinced that this one is really necessary. copy_max_bytes_hw != >0 indicates that the devices supports copy offload. And setting copy_max_bytes >to 0 can be used to disable copy offload (which probably should be the default >for now). > Agreed, we will do this in next iteration. >> + >> + >> +What: /sys/block/<disk>/queue/copy_max_bytes >> +Date: June 2023 >> +Contact: linux-block@vger.kernel.org >> +Description: >> + [RW] This is the maximum number of bytes that the block layer >> + will allow for a copy request. This will is always smaller or > >will is -> is > acked >> + equal to the maximum size allowed by the hardware, indicated by >> + 'copy_max_bytes_hw'. An attempt to set a value higher than >> + 'copy_max_bytes_hw' will truncate this to 'copy_max_bytes_hw'. >> + >> + >> +What: /sys/block/<disk>/queue/copy_max_bytes_hw > >Nit: In keeping with the spirit of attributes like >max_hw_sectors_kb/max_sectors_kb, I would call this one copy_max_hw_bytes. > acked, will update in next iteration. >> +Date: June 2023 >> +Contact: linux-block@vger.kernel.org >> +Description: >> + [RO] This is the maximum number of bytes that the hardware >> + will allow for single data copy request. >> + A value of 0 means that the device does not support >> + copy offload. >> + >> + >> What: /sys/block/<disk>/queue/crypto/ >> Date: February 2022 >> Contact: linux-block@vger.kernel.org >> diff --git a/block/blk-settings.c b/block/blk-settings.c >> index 4dd59059b788..738cd3f21259 100644 >> --- a/block/blk-settings.c >> +++ b/block/blk-settings.c >> @@ -59,6 +59,8 @@ void blk_set_default_limits(struct queue_limits *lim) >> lim->zoned = BLK_ZONED_NONE; >> lim->zone_write_granularity = 0; >> lim->dma_alignment = 511; >> + lim->max_copy_sectors_hw = 0; >> + lim->max_copy_sectors = 0; >> } >> >> /** >> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim) >> lim->max_dev_sectors = UINT_MAX; >> lim->max_write_zeroes_sectors = UINT_MAX; >> lim->max_zone_append_sectors = UINT_MAX; >> + lim->max_copy_sectors_hw = UINT_MAX; >> + lim->max_copy_sectors = UINT_MAX; >> } >> EXPORT_SYMBOL(blk_set_stacking_limits); >> >> @@ -183,6 +187,22 @@ void blk_queue_max_discard_sectors(struct request_queue *q, >> } >> EXPORT_SYMBOL(blk_queue_max_discard_sectors); >> >> +/** >> + * blk_queue_max_copy_sectors_hw - set max sectors for a single copy payload >> + * @q: the request queue for the device >> + * @max_copy_sectors: maximum number of sectors to copy >> + **/ >> +void blk_queue_max_copy_sectors_hw(struct request_queue *q, >> + unsigned int max_copy_sectors) >> +{ >> + if (max_copy_sectors > (COPY_MAX_BYTES >> SECTOR_SHIFT)) >> + max_copy_sectors = COPY_MAX_BYTES >> SECTOR_SHIFT; >> + >> + q->limits.max_copy_sectors_hw = max_copy_sectors; >> + q->limits.max_copy_sectors = max_copy_sectors; >> +} >> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_sectors_hw); >> + >> /** >> * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase >> * @q: the request queue for the device >> @@ -578,6 +598,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, >> t->max_segment_size = min_not_zero(t->max_segment_size, >> b->max_segment_size); >> >> + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); >> + t->max_copy_sectors_hw = min(t->max_copy_sectors_hw, >> + b->max_copy_sectors_hw); >> + >> t->misaligned |= b->misaligned; >> >> alignment = queue_limit_alignment_offset(b, start); >> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c >> index afc797fb0dfc..43551778d035 100644 >> --- a/block/blk-sysfs.c >> +++ b/block/blk-sysfs.c >> @@ -199,6 +199,62 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag >> return queue_var_show(0, page); >> } >> >> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) >> +{ >> + return queue_var_show(blk_queue_copy(q), page); >> +} >> + >> +static ssize_t queue_copy_offload_store(struct request_queue *q, >> + const char *page, size_t count) >> +{ >> + unsigned long copy_offload; >> + ssize_t ret = queue_var_store(©_offload, page, count); >> + >> + if (ret < 0) >> + return ret; >> + >> + if (copy_offload && !q->limits.max_copy_sectors_hw) >> + return -EINVAL; >> + >> + if (copy_offload) >> + blk_queue_flag_set(QUEUE_FLAG_COPY, q); >> + else >> + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); >> + >> + return count; >> +} > >See above. I think we can drop this attribute. > acked Thank you, Nitesh Shetty
Just a little heads up: I think we need to properly solve the hw vs user limit as the various ad-hoc mechanisms tend to be broken and don't scale. I plan to start looking into that the next days. On Wed, Jun 28, 2023 at 12:06:15AM +0530, Nitesh Shetty wrote: > Add device limits as sysfs entries, > - copy_offload (RW) > - copy_max_bytes (RW) > - copy_max_bytes_hw (RO) > > Above limits help to split the copy payload in block layer. > copy_offload: used for setting copy offload(1) or emulation(0). > copy_max_bytes: maximum total length of copy in single payload. > copy_max_bytes_hw: Reflects the device supported maximum limit. Why do we need the separate copy_offload boolean vs just looking at the max_bytes value?
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index b7b56871029c..dff56813f95a 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -64,6 +64,9 @@ struct fstrim_range { > __u64 minlen; > }; > > +/* maximum copy offload length, this is set to 128MB based on current testing */ > +#define COPY_MAX_BYTES (1 << 27) This should not be in the UAPI. It is a totally arbitrary internal limit.
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index c57e5b7cb532..3c97303f658b 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -155,6 +155,39 @@ Description: last zone of the device which may be smaller. +What: /sys/block/<disk>/queue/copy_offload +Date: June 2023 +Contact: linux-block@vger.kernel.org +Description: + [RW] When read, this file shows whether offloading copy to a + device is enabled (1) or disabled (0). Writing '0' to this + file will disable offloading copies for this device. + Writing any '1' value will enable this feature. If the device + does not support offloading, then writing 1, will result in an + error. + + +What: /sys/block/<disk>/queue/copy_max_bytes +Date: June 2023 +Contact: linux-block@vger.kernel.org +Description: + [RW] This is the maximum number of bytes that the block layer + will allow for a copy request. This will is always smaller or + equal to the maximum size allowed by the hardware, indicated by + 'copy_max_bytes_hw'. An attempt to set a value higher than + 'copy_max_bytes_hw' will truncate this to 'copy_max_bytes_hw'. + + +What: /sys/block/<disk>/queue/copy_max_bytes_hw +Date: June 2023 +Contact: linux-block@vger.kernel.org +Description: + [RO] This is the maximum number of bytes that the hardware + will allow for single data copy request. + A value of 0 means that the device does not support + copy offload. + + What: /sys/block/<disk>/queue/crypto/ Date: February 2022 Contact: linux-block@vger.kernel.org diff --git a/block/blk-settings.c b/block/blk-settings.c index 4dd59059b788..738cd3f21259 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -59,6 +59,8 @@ void blk_set_default_limits(struct queue_limits *lim) lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; lim->dma_alignment = 511; + lim->max_copy_sectors_hw = 0; + lim->max_copy_sectors = 0; } /** @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_dev_sectors = UINT_MAX; lim->max_write_zeroes_sectors = UINT_MAX; lim->max_zone_append_sectors = UINT_MAX; + lim->max_copy_sectors_hw = UINT_MAX; + lim->max_copy_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -183,6 +187,22 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_max_copy_sectors_hw - set max sectors for a single copy payload + * @q: the request queue for the device + * @max_copy_sectors: maximum number of sectors to copy + **/ +void blk_queue_max_copy_sectors_hw(struct request_queue *q, + unsigned int max_copy_sectors) +{ + if (max_copy_sectors > (COPY_MAX_BYTES >> SECTOR_SHIFT)) + max_copy_sectors = COPY_MAX_BYTES >> SECTOR_SHIFT; + + q->limits.max_copy_sectors_hw = max_copy_sectors; + q->limits.max_copy_sectors = max_copy_sectors; +} +EXPORT_SYMBOL_GPL(blk_queue_max_copy_sectors_hw); + /** * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase * @q: the request queue for the device @@ -578,6 +598,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); + t->max_copy_sectors_hw = min(t->max_copy_sectors_hw, + b->max_copy_sectors_hw); + t->misaligned |= b->misaligned; alignment = queue_limit_alignment_offset(b, start); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index afc797fb0dfc..43551778d035 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -199,6 +199,62 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(0, page); } +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_copy(q), page); +} + +static ssize_t queue_copy_offload_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long copy_offload; + ssize_t ret = queue_var_store(©_offload, page, count); + + if (ret < 0) + return ret; + + if (copy_offload && !q->limits.max_copy_sectors_hw) + return -EINVAL; + + if (copy_offload) + blk_queue_flag_set(QUEUE_FLAG_COPY, q); + else + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); + + return count; +} + +static ssize_t queue_copy_max_hw_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long) + q->limits.max_copy_sectors_hw << SECTOR_SHIFT); +} + +static ssize_t queue_copy_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long) + q->limits.max_copy_sectors << SECTOR_SHIFT); +} + +static ssize_t queue_copy_max_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max_copy; + ssize_t ret = queue_var_store(&max_copy, page, count); + + if (ret < 0) + return ret; + + if (max_copy & (queue_logical_block_size(q) - 1)) + return -EINVAL; + + max_copy >>= SECTOR_SHIFT; + q->limits.max_copy_sectors = min_t(unsigned int, max_copy, + q->limits.max_copy_sectors_hw); + + return count; +} + static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) { return queue_var_show(0, page); @@ -522,6 +578,10 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload"); +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_bytes_hw"); +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes"); + QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); @@ -638,6 +698,9 @@ static struct attribute *queue_attrs[] = { &queue_discard_max_entry.attr, &queue_discard_max_hw_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_copy_offload_entry.attr, + &queue_copy_max_hw_entry.attr, + &queue_copy_max_entry.attr, &queue_write_same_max_entry.attr, &queue_write_zeroes_max_entry.attr, &queue_zone_append_max_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed44a997f629..6098665953e6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -309,6 +309,9 @@ struct queue_limits { unsigned int discard_alignment; unsigned int zone_write_granularity; + unsigned int max_copy_sectors_hw; + unsigned int max_copy_sectors; + unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -554,6 +557,7 @@ struct request_queue { #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ +#define QUEUE_FLAG_COPY 32 /* enable/disable device copy offload */ #define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ (1UL << QUEUE_FLAG_SAME_COMP) | \ @@ -574,6 +578,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) +#define blk_queue_copy(q) test_bit(QUEUE_FLAG_COPY, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) @@ -891,6 +896,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_discard_segments(struct request_queue *, unsigned short); +extern void blk_queue_max_copy_sectors_hw(struct request_queue *q, + unsigned int max_copy_sectors); void blk_queue_max_secure_erase_sectors(struct request_queue *q, unsigned int max_sectors); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); @@ -1210,6 +1217,11 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev) return bdev_get_queue(bdev)->limits.discard_granularity; } +static inline unsigned int bdev_max_copy_sectors(struct block_device *bdev) +{ + return bdev_get_queue(bdev)->limits.max_copy_sectors; +} + static inline unsigned int bdev_max_secure_erase_sectors(struct block_device *bdev) { diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index b7b56871029c..dff56813f95a 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -64,6 +64,9 @@ struct fstrim_range { __u64 minlen; }; +/* maximum copy offload length, this is set to 128MB based on current testing */ +#define COPY_MAX_BYTES (1 << 27) + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1